PyPI - haiku.rag - Versions diffs - 0.11.4__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

haiku.rag 0.11.4py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (24) hide show

haiku/rag/a2a/__init__.py +176 -0
haiku/rag/a2a/client.py +268 -0
haiku/rag/a2a/context.py +68 -0
haiku/rag/a2a/models.py +21 -0
haiku/rag/a2a/prompts.py +59 -0
haiku/rag/a2a/skills.py +75 -0
haiku/rag/a2a/storage.py +71 -0
haiku/rag/a2a/worker.py +320 -0
haiku/rag/app.py +87 -19
haiku/rag/cli.py +81 -71
haiku/rag/client.py +47 -4
haiku/rag/config.py +4 -0
haiku/rag/embeddings/base.py +8 -0
haiku/rag/embeddings/ollama.py +8 -0
haiku/rag/embeddings/openai.py +8 -0
haiku/rag/embeddings/vllm.py +8 -0
haiku/rag/embeddings/voyageai.py +8 -0
haiku/rag/mcp.py +99 -0
haiku/rag/qa/agent.py +0 -3
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/METADATA +33 -10
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/RECORD +24 -16
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/WHEEL +0 -0
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/entry_points.txt +0 -0
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/licenses/LICENSE +0 -0

haiku/rag/a2a/storage.py ADDED Viewed

@@ -0,0 +1,71 @@
+import logging
+from collections import OrderedDict
+try:
+    from fasta2a.schema import Artifact, Message, TaskState  # type: ignore
+    from fasta2a.storage import InMemoryStorage, Storage  # type: ignore
+except ImportError as e:
+    raise ImportError(
+        "A2A support requires the 'a2a' extra. "
+        "Install with: uv pip install 'haiku.rag[a2a]'"
+    ) from e
+logger = logging.getLogger(__name__)
+class LRUMemoryStorage(Storage[list[Message]]):  # type: ignore
+    """Storage wrapper with LRU eviction for contexts.
+    Enforces a maximum context limit using LRU (Least Recently Used) eviction.
+    """
+    def __init__(self, storage: InMemoryStorage, max_contexts: int):
+        self.storage = storage
+        self.max_contexts = max_contexts
+        # Track context access order (LRU cache)
+        self.context_order: OrderedDict[str, None] = OrderedDict()
+    async def load_context(self, context_id: str) -> list[Message] | None:
+        """Load context and update access order."""
+        result = await self.storage.load_context(context_id)
+        if result is not None:
+            # Move to end (most recently used)
+            self.context_order.pop(context_id, None)
+            self.context_order[context_id] = None
+        return result
+    async def update_context(self, context_id: str, context: list[Message]) -> None:
+        """Update context and enforce LRU limit."""
+        await self.storage.update_context(context_id, context)
+        # Move to end (most recently used)
+        self.context_order.pop(context_id, None)
+        self.context_order[context_id] = None
+        # Enforce max contexts limit (LRU eviction)
+        while len(self.context_order) > self.max_contexts:
+            # Remove oldest (first item in OrderedDict)
+            oldest_context_id = next(iter(self.context_order))
+            self.context_order.pop(oldest_context_id)
+            logger.debug(
+                f"Evicted context {oldest_context_id} (LRU, limit={self.max_contexts})"
+            )
+    async def load_task(self, task_id: str, history_length: int | None = None):
+        """Delegate to underlying storage."""
+        return await self.storage.load_task(task_id, history_length)
+    async def update_task(
+        self,
+        task_id: str,
+        state: TaskState,
+        new_artifacts: list[Artifact] | None = None,
+        new_messages: list[Message] | None = None,
+    ):
+        """Delegate to underlying storage."""
+        return await self.storage.update_task(
+            task_id, state, new_artifacts, new_messages
+        )
+    async def submit_task(self, context_id: str, message: Message):
+        """Delegate to underlying storage."""
+        return await self.storage.submit_task(context_id, message)

haiku/rag/a2a/worker.py ADDED Viewed

@@ -0,0 +1,320 @@
+import json
+import logging
+import uuid
+from pathlib import Path
+from pydantic_ai import Agent
+from haiku.rag.a2a.context import load_message_history, save_message_history
+from haiku.rag.a2a.models import AgentDependencies
+from haiku.rag.a2a.skills import extract_question_from_task
+from haiku.rag.client import HaikuRAG
+try:
+    from fasta2a import Worker  # type: ignore
+    from fasta2a.schema import (  # type: ignore
+        Artifact,
+        Message,
+        TaskIdParams,
+        TaskSendParams,
+        TextPart,
+    )
+except ImportError as e:
+    raise ImportError(
+        "A2A support requires the 'a2a' extra. "
+        "Install with: uv pip install 'haiku.rag[a2a]'"
+    ) from e
+logger = logging.getLogger(__name__)
+class ConversationalWorker(Worker[list[Message]]):
+    """Worker that handles conversational QA tasks."""
+    def __init__(
+        self,
+        storage,
+        broker,
+        db_path: Path,
+        agent: "Agent[AgentDependencies, str]",
+    ):
+        super().__init__(storage=storage, broker=broker)
+        self.db_path = db_path
+        self.agent = agent
+    async def run_task(self, params: TaskSendParams) -> None:
+        task = await self.storage.load_task(params["id"])
+        if task is None:
+            raise ValueError(f"Task {params['id']} not found")
+        if task["status"]["state"] != "submitted":
+            raise ValueError(
+                f"Task {params['id']} already processed: {task['status']['state']}"
+            )
+        await self.storage.update_task(task["id"], state="working")
+        task_history = task.get("history", [])
+        question = extract_question_from_task(task_history)
+        if not question:
+            await self.storage.update_task(task["id"], state="failed")
+            return
+        try:
+            async with HaikuRAG(self.db_path) as client:
+                context = await self.storage.load_context(task["context_id"]) or []
+                message_history = load_message_history(context)
+                deps = AgentDependencies(client=client)
+                result = await self.agent.run(
+                    question, deps=deps, message_history=message_history
+                )
+                # Detect which skill was used
+                skill_type = self._detect_skill(result)
+                # Build messages based on skill type
+                response_messages = self._build_response_messages(result, skill_type)
+                # Update context with complete conversation state
+                updated_history = message_history + result.new_messages()
+                state_message = save_message_history(updated_history)
+                await self.storage.update_context(task["context_id"], [state_message])
+                artifacts = self.build_artifacts(result, skill_type, question)
+                await self.storage.update_task(
+                    task["id"],
+                    state="completed",
+                    new_messages=response_messages,
+                    new_artifacts=artifacts,
+                )
+        except Exception as e:
+            logger.error(
+                "Task execution failed: task_id=%s, question=%s, error=%s",
+                task["id"],
+                question,
+                str(e),
+                exc_info=True,
+            )
+            await self.storage.update_task(task["id"], state="failed")
+            raise
+    async def cancel_task(self, params: TaskIdParams) -> None:
+        """Cancel a task - not implemented for this worker."""
+        pass
+    def build_message_history(self, history: list[Message]) -> list[Message]:
+        """Required by Worker interface but unused - history stored in context."""
+        return history
+    def _detect_skill(self, result) -> str:
+        """Detect which skill was used based on tool calls and response pattern.
+        Returns:
+            "search", "retrieve", or "qa"
+        """
+        from pydantic_ai.messages import ModelResponse, ToolCallPart
+        tool_calls = []
+        for msg in result.new_messages():
+            if isinstance(msg, ModelResponse):
+                for part in msg.parts:
+                    if isinstance(part, ToolCallPart):
+                        tool_calls.append(part.tool_name)
+        # Check if output looks like formatted search results
+        output_str = str(result.output).strip()
+        # Check for either format: "Found N relevant results" or "**Search results for"
+        is_search_format = (
+            output_str.startswith("Found ") and "relevant results" in output_str[:100]
+        ) or output_str.startswith("**Search results for")
+        skill_type = "qa"
+        # If output is in search format and only search tools were used, it's a search
+        if is_search_format and all(tc == "search_documents" for tc in tool_calls):
+            skill_type = "search"
+        elif "get_full_document" in tool_calls and len(tool_calls) == 1:
+            skill_type = "retrieve"
+        return skill_type
+    def _build_response_messages(self, result, skill_type: str) -> list[Message]:
+        """Build response messages based on skill type.
+        All skills return a single text message with LLM's response.
+        Structured data is provided via artifacts for search/retrieve.
+        """
+        if skill_type == "search":
+            # Return LLM's formatted response
+            return [
+                Message(
+                    role="agent",
+                    parts=[TextPart(kind="text", text=str(result.output))],
+                    kind="message",
+                    message_id=str(uuid.uuid4()),
+                )
+            ]
+        elif skill_type == "retrieve":
+            # Extract document content
+            from pydantic_ai.messages import ModelRequest, ToolReturnPart
+            document_content = ""
+            for msg in result.new_messages():
+                if isinstance(msg, ModelRequest):
+                    for part in msg.parts:
+                        if (
+                            isinstance(part, ToolReturnPart)
+                            and part.tool_name == "get_full_document"
+                        ):
+                            document_content = part.content
+                            break
+            return [
+                Message(
+                    role="agent",
+                    parts=[TextPart(kind="text", text=document_content)],
+                    kind="message",
+                    message_id=str(uuid.uuid4()),
+                )
+            ]
+        else:
+            # Conversational Q&A - use agent's answer
+            return [
+                Message(
+                    role="agent",
+                    parts=[TextPart(kind="text", text=str(result.output))],
+                    kind="message",
+                    message_id=str(uuid.uuid4()),
+                )
+            ]
+    def build_artifacts(
+        self, result, skill_type: str | None = None, question: str | None = None
+    ) -> list[Artifact]:
+        """Build artifacts from agent result based on tool calls.
+        Creates artifacts for:
+        - Each tool call (search_documents, get_full_document)
+        - Q&A operations: additional artifact with question and answer (only if tools were used)
+        """
+        if skill_type is None:
+            skill_type = self._detect_skill(result)
+        artifacts = []
+        # Always create artifacts for all tool calls
+        tool_artifacts = self._build_all_tool_artifacts(result)
+        artifacts.extend(tool_artifacts)
+        # For Q&A, always add a Q&A artifact with question and answer
+        # This includes follow-up questions, clarifications, and conversational responses
+        if skill_type == "qa" and question:
+            from fasta2a.schema import DataPart
+            artifacts.append(
+                Artifact(
+                    artifact_id=str(uuid.uuid4()),
+                    name="qa_result",
+                    parts=[
+                        DataPart(
+                            kind="data",
+                            data={
+                                "question": question,
+                                "answer": str(result.output),
+                                "skill": "document-qa",
+                            },
+                            metadata={"skill": "document-qa"},
+                        )
+                    ],
+                )
+            )
+        return artifacts
+    def _build_all_tool_artifacts(self, result) -> list[Artifact]:
+        """Build artifacts for all tool calls."""
+        from pydantic_ai.messages import (
+            ModelRequest,
+            ModelResponse,
+            ToolCallPart,
+            ToolReturnPart,
+        )
+        artifacts = []
+        # Track tool calls and their returns by call_id
+        tool_returns = {}
+        for msg in result.new_messages():
+            if isinstance(msg, ModelRequest):
+                for part in msg.parts:
+                    if isinstance(part, ToolReturnPart):
+                        result_count = (
+                            len(part.content) if isinstance(part.content, list) else 1
+                        )
+                        logger.info(
+                            "Tool return: tool_call_id=%s, tool_name=%s, result_count=%s",
+                            part.tool_call_id,
+                            part.tool_name,
+                            result_count,
+                        )
+                        tool_returns[part.tool_call_id] = (part.tool_name, part.content)
+        # Create artifacts for each tool call
+        for msg in result.new_messages():
+            if isinstance(msg, ModelResponse):
+                for part in msg.parts:
+                    if isinstance(part, ToolCallPart):
+                        tool_name, content = tool_returns.get(
+                            part.tool_call_id, (None, None)
+                        )
+                        if tool_name == "search_documents" and content:
+                            from fasta2a.schema import DataPart
+                            # Extract query from tool call arguments
+                            query = ""
+                            if isinstance(part.args, dict):
+                                query = part.args.get("query", "")
+                            elif isinstance(part.args, str):
+                                # Args is a JSON string - parse it
+                                try:
+                                    args_dict = json.loads(part.args)
+                                    query = args_dict.get("query", "")
+                                except (json.JSONDecodeError, AttributeError):
+                                    query = ""
+                            elif hasattr(part.args, "get") and callable(
+                                getattr(part.args, "get", None)
+                            ):
+                                # ArgsDict or dict-like object
+                                query = part.args.get("query", "")  # type: ignore
+                            elif hasattr(part.args, "query"):
+                                # Object with query attribute
+                                query = str(part.args.query)  # type: ignore
+                            artifacts.append(
+                                Artifact(
+                                    artifact_id=str(uuid.uuid4()),
+                                    name="search_results",
+                                    parts=[
+                                        DataPart(
+                                            kind="data",
+                                            data={"results": content, "query": query},
+                                            metadata={"query": query},
+                                        )
+                                    ],
+                                )
+                            )
+                        elif tool_name == "get_full_document" and content:
+                            artifacts.append(
+                                Artifact(
+                                    artifact_id=str(uuid.uuid4()),
+                                    name="document",
+                                    parts=[TextPart(kind="text", text=content)],
+                                )
+                            )
+        return artifacts

haiku/rag/app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import json
+import logging
 from importlib.metadata import version as pkg_version
 from pathlib import Path
@@ -22,6 +23,8 @@ from haiku.rag.research.stream import stream_research_graph
 from haiku.rag.store.models.chunk import Chunk
 from haiku.rag.store.models.document import Document
+logger = logging.getLogger(__name__)
 class HaikuRAGApp:
     def __init__(self, db_path: Path):
@@ -157,13 +160,20 @@ class HaikuRAGApp:
         self, source: str, title: str | None = None, metadata: dict | None = None
     ):
         async with HaikuRAG(db_path=self.db_path) as self.client:
-            doc = await self.client.create_document_from_source(
+            result = await self.client.create_document_from_source(
                 source, title=title, metadata=metadata
             )
-            self._rich_print_document(doc, truncate=True)
-            self.console.print(
-                f"[bold green]Document {doc.id} added successfully.[/bold green]"
-            )
+            if isinstance(result, list):
+                for doc in result:
+                    self._rich_print_document(doc, truncate=True)
+                self.console.print(
+                    f"[bold green]{len(result)} documents added successfully.[/bold green]"
+                )
+            else:
+                self._rich_print_document(result, truncate=True)
+                self.console.print(
+                    f"[bold green]Document {result.id} added successfully.[/bold green]"
+                )
     async def get_document(self, doc_id: str):
         async with HaikuRAG(db_path=self.db_path) as self.client:
@@ -448,23 +458,81 @@ class HaikuRAGApp:
         self.console.print(content)
         self.console.rule()
-    async def serve(self, transport: str | None = None):
-        """Start the MCP server."""
+    async def serve(
+        self,
+        enable_monitor: bool = True,
+        enable_mcp: bool = True,
+        mcp_transport: str | None = None,
+        mcp_port: int = 8001,
+        enable_a2a: bool = False,
+        a2a_host: str = "127.0.0.1",
+        a2a_port: int = 8000,
+    ):
+        """Start the server with selected services."""
         async with HaikuRAG(self.db_path) as client:
-            monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
-            monitor_task = asyncio.create_task(monitor.observe())
-            server = create_mcp_server(self.db_path)
+            tasks = []
+            # Start file monitor if enabled
+            if enable_monitor:
+                monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
+                monitor_task = asyncio.create_task(monitor.observe())
+                tasks.append(monitor_task)
+            # Start MCP server if enabled
+            if enable_mcp:
+                server = create_mcp_server(self.db_path)
+                async def run_mcp():
+                    if mcp_transport == "stdio":
+                        await server.run_stdio_async()
+                    else:
+                        logger.info(f"Starting MCP server on port {mcp_port}")
+                        await server.run_http_async(
+                            transport="streamable-http", port=mcp_port
+                        )
+                mcp_task = asyncio.create_task(run_mcp())
+                tasks.append(mcp_task)
+            # Start A2A server if enabled
+            if enable_a2a:
+                try:
+                    from haiku.rag.a2a import create_a2a_app
+                except ImportError as e:
+                    logger.error(f"Failed to import A2A: {e}")
+                    return
+                import uvicorn
+                logger.info(f"Starting A2A server on {a2a_host}:{a2a_port}")
+                async def run_a2a():
+                    app = create_a2a_app(db_path=self.db_path)
+                    config = uvicorn.Config(
+                        app,
+                        host=a2a_host,
+                        port=a2a_port,
+                        log_level="warning",
+                        access_log=False,
+                    )
+                    server = uvicorn.Server(config)
+                    await server.serve()
+                a2a_task = asyncio.create_task(run_a2a())
+                tasks.append(a2a_task)
+            if not tasks:
+                logger.warning("No services enabled")
+                return
             try:
-                if transport == "stdio":
-                    await server.run_stdio_async()
-                else:
-                    await server.run_http_async(transport="streamable-http")
+                # Wait for any task to complete (or KeyboardInterrupt)
+                await asyncio.gather(*tasks)
             except KeyboardInterrupt:
                 pass
             finally:
-                monitor_task.cancel()
-                try:
-                    await monitor_task
-                except asyncio.CancelledError:
-                    pass
+                # Cancel all tasks
+                for task in tasks:
+                    task.cancel()
+                # Wait for cancellation
+                await asyncio.gather(*tasks, return_exceptions=True)

haiku.rag 0.11.4__py3-none-any.whl → 0.12.1__py3-none-any.whl

Potentially problematic release.

haiku.rag 0.11.4py3-none-any.whl → 0.12.1py3-none-any.whl