PyPI - haiku.rag - Versions diffs - 0.11.4__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

haiku.rag 0.11.4py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (24) hide show

haiku/rag/a2a/__init__.py +176 -0
haiku/rag/a2a/client.py +268 -0
haiku/rag/a2a/context.py +68 -0
haiku/rag/a2a/models.py +21 -0
haiku/rag/a2a/prompts.py +59 -0
haiku/rag/a2a/skills.py +75 -0
haiku/rag/a2a/storage.py +71 -0
haiku/rag/a2a/worker.py +320 -0
haiku/rag/app.py +87 -19
haiku/rag/cli.py +81 -71
haiku/rag/client.py +47 -4
haiku/rag/config.py +4 -0
haiku/rag/embeddings/base.py +8 -0
haiku/rag/embeddings/ollama.py +8 -0
haiku/rag/embeddings/openai.py +8 -0
haiku/rag/embeddings/vllm.py +8 -0
haiku/rag/embeddings/voyageai.py +8 -0
haiku/rag/mcp.py +99 -0
haiku/rag/qa/agent.py +0 -3
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/METADATA +33 -10
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/RECORD +24 -16
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/WHEEL +0 -0
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/entry_points.txt +0 -0
{haiku_rag-0.11.4.dist-info → haiku_rag-0.12.1.dist-info}/licenses/LICENSE +0 -0

haiku/rag/a2a/__init__.py ADDED Viewed

@@ -0,0 +1,176 @@
+import logging
+from contextlib import asynccontextmanager
+from pathlib import Path
+import logfire
+from pydantic_ai import Agent, RunContext
+from haiku.rag.config import Config
+from haiku.rag.graph.common import get_model
+from .context import load_message_history, save_message_history
+from .models import AgentDependencies, SearchResult
+from .prompts import A2A_SYSTEM_PROMPT
+from .skills import extract_question_from_task, get_agent_skills
+from .storage import LRUMemoryStorage
+from .worker import ConversationalWorker
+try:
+    from fasta2a import FastA2A  # type: ignore
+    from fasta2a.broker import InMemoryBroker  # type: ignore
+    from fasta2a.storage import InMemoryStorage  # type: ignore
+except ImportError as e:
+    raise ImportError(
+        "A2A support requires the 'a2a' extra. "
+        "Install with: uv pip install 'haiku.rag[a2a]'"
+    ) from e
+logfire.configure(send_to_logfire="if-token-present", service_name="a2a")
+logfire.instrument_pydantic_ai()
+logger = logging.getLogger(__name__)
+__all__ = [
+    "create_a2a_app",
+    "load_message_history",
+    "save_message_history",
+    "extract_question_from_task",
+    "get_agent_skills",
+    "LRUMemoryStorage",
+]
+def create_a2a_app(
+    db_path: Path,
+    security_schemes: dict | None = None,
+    security: list[dict[str, list[str]]] | None = None,
+):
+    """Create an A2A app for the conversational QA agent.
+    Args:
+        db_path: Path to the LanceDB database
+        security_schemes: Optional security scheme definitions for the AgentCard
+        security: Optional security requirements for the AgentCard
+    Returns:
+        A FastA2A ASGI application
+    """
+    base_storage = InMemoryStorage()
+    storage = LRUMemoryStorage(
+        storage=base_storage, max_contexts=Config.A2A_MAX_CONTEXTS
+    )
+    broker = InMemoryBroker()
+    # Create the agent with native search tool
+    model = get_model(Config.QA_PROVIDER, Config.QA_MODEL)
+    agent = Agent(
+        model=model,
+        deps_type=AgentDependencies,
+        system_prompt=A2A_SYSTEM_PROMPT,
+        retries=3,
+    )
+    @agent.tool
+    async def search_documents(
+        ctx: RunContext[AgentDependencies],
+        query: str,
+        limit: int = 3,
+    ) -> list[SearchResult]:
+        """Search the knowledge base for relevant documents.
+        Returns chunks of text with their relevance scores and document URIs.
+        Use get_full_document if you need to see the complete document content.
+        """
+        search_results = await ctx.deps.client.search(query, limit=limit)
+        expanded_results = await ctx.deps.client.expand_context(search_results)
+        return [
+            SearchResult(
+                content=chunk.content,
+                score=score,
+                document_title=chunk.document_title,
+                document_uri=(chunk.document_uri or ""),
+            )
+            for chunk, score in expanded_results
+        ]
+    @agent.tool
+    async def get_full_document(
+        ctx: RunContext[AgentDependencies],
+        document_uri: str,
+    ) -> str:
+        """Retrieve the complete content of a document by its URI.
+        Use this when you need more context than what's in a search result chunk.
+        The document_uri comes from search_documents results.
+        """
+        document = await ctx.deps.client.get_document_by_uri(document_uri)
+        if document is None:
+            return f"Document not found: {document_uri}"
+        return document.content
+    worker = ConversationalWorker(
+        storage=storage,
+        broker=broker,
+        db_path=db_path,
+        agent=agent,  # type: ignore
+    )
+    # Create FastA2A app with custom worker lifecycle
+    @asynccontextmanager
+    async def lifespan(app):
+        logger.info(f"Started A2A server (max contexts: {Config.A2A_MAX_CONTEXTS})")
+        async with app.task_manager:
+            async with worker.run():
+                yield
+    app = FastA2A(
+        storage=storage,
+        broker=broker,
+        name="haiku-rag",
+        description="Conversational question answering agent powered by haiku.rag RAG system",
+        skills=get_agent_skills(),
+        lifespan=lifespan,
+    )
+    # Add security configuration if provided
+    if security_schemes or security:
+        # Monkey-patch the agent card endpoint to include security
+        async def _agent_card_endpoint_with_security(request):
+            from fasta2a.schema import AgentCapabilities, AgentCard, agent_card_ta
+            from starlette.responses import Response
+            if app._agent_card_json_schema is None:
+                agent_card = AgentCard(
+                    name=app.name,
+                    description=app.description
+                    or "An AI agent exposed as an A2A agent.",
+                    url=app.url,
+                    version=app.version,
+                    protocol_version="0.3.0",
+                    skills=app.skills,
+                    default_input_modes=app.default_input_modes,
+                    default_output_modes=app.default_output_modes,
+                    capabilities=AgentCapabilities(
+                        streaming=False,
+                        push_notifications=False,
+                        state_transition_history=False,
+                    ),
+                )
+                if app.provider is not None:
+                    agent_card["provider"] = app.provider
+                if security_schemes:
+                    agent_card["security_schemes"] = security_schemes
+                if security:
+                    agent_card["security"] = security
+                app._agent_card_json_schema = agent_card_ta.dump_json(
+                    agent_card, by_alias=True
+                )
+            return Response(
+                content=app._agent_card_json_schema, media_type="application/json"
+            )
+        app._agent_card_endpoint = _agent_card_endpoint_with_security
+    return app

haiku/rag/a2a/client.py ADDED Viewed

@@ -0,0 +1,268 @@
+import asyncio
+import uuid
+from typing import Any
+import httpx
+from rich.console import Console
+from rich.markdown import Markdown
+from rich.prompt import Prompt
+try:
+    from fasta2a.client import A2AClient as FastA2AClient
+    from fasta2a.schema import Message, TextPart
+except ImportError as e:
+    raise ImportError(
+        "A2A support requires the 'a2a' extra. "
+        "Install with: uv pip install 'haiku.rag[a2a]'"
+    ) from e
+class A2AClient:
+    """Interactive A2A protocol client."""
+    def __init__(self, base_url: str = "http://localhost:8000"):
+        """Initialize A2A client.
+        Args:
+            base_url: Base URL of the A2A server
+        """
+        self.base_url = base_url.rstrip("/")
+        http_client = httpx.AsyncClient(timeout=60.0)
+        self._client = FastA2AClient(base_url=base_url, http_client=http_client)
+    async def close(self):
+        """Close the HTTP client."""
+        await self._client.http_client.aclose()
+    async def get_agent_card(self) -> dict[str, Any]:
+        """Fetch the agent card from the A2A server.
+        Returns:
+            Agent card dictionary with agent capabilities and metadata
+        """
+        response = await self._client.http_client.get(
+            f"{self.base_url}/.well-known/agent-card.json"
+        )
+        response.raise_for_status()
+        return response.json()
+    async def send_message(
+        self,
+        text: str,
+        context_id: str | None = None,
+        skill_id: str | None = None,
+    ) -> dict[str, Any]:
+        """Send a message to the A2A agent and wait for completion.
+        Args:
+            text: Message text to send
+            context_id: Optional conversation context ID (creates new if None)
+            skill_id: Optional skill ID to use (defaults to document-qa)
+        Returns:
+            Completed task with response messages and artifacts
+        """
+        if context_id is None:
+            context_id = str(uuid.uuid4())
+        message = Message(
+            kind="message",
+            role="user",
+            message_id=str(uuid.uuid4()),
+            parts=[TextPart(kind="text", text=text)],
+        )
+        metadata: dict[str, Any] = {"contextId": context_id}
+        if skill_id:
+            metadata["skillId"] = skill_id
+        response = await self._client.send_message(message, metadata=metadata)
+        if "error" in response:
+            return {"error": response["error"]}
+        result = response.get("result")
+        if not result:
+            return {"result": result}
+        # Result can be either Task or Message - check if it's a Task with an id
+        if result.get("kind") == "task":
+            task_id = result.get("id")
+            if task_id:
+                # Poll for task completion
+                return await self.wait_for_task(task_id)
+        # Return the message directly
+        return {"result": result}
+    async def wait_for_task(
+        self, task_id: str, max_wait: int = 120, poll_interval: float = 0.5
+    ) -> dict[str, Any]:
+        """Poll for task completion.
+        Args:
+            task_id: Task ID to poll for
+            max_wait: Maximum time to wait in seconds
+            poll_interval: Interval between polls in seconds
+        Returns:
+            Completed task result
+        """
+        import time
+        start_time = time.time()
+        while time.time() - start_time < max_wait:
+            task_response = await self._client.get_task(task_id)
+            if "error" in task_response:
+                return {"error": task_response["error"]}
+            task = task_response.get("result")
+            if not task:
+                raise Exception("No task in response")
+            state = task.get("status", {}).get("state")
+            if state == "completed":
+                return {"result": task}
+            elif state == "failed":
+                raise Exception(f"Task failed: {task}")
+            await asyncio.sleep(poll_interval)
+        raise TimeoutError(f"Task {task_id} did not complete within {max_wait}s")
+def print_agent_card(card: dict[str, Any], console: Console):
+    """Pretty print the agent card using Rich."""
+    console.print()
+    console.print("[bold]Agent Card[/bold]")
+    console.rule()
+    console.print(f"  [repr.attrib_name]name[/repr.attrib_name]: {card.get('name')}")
+    console.print(
+        f"  [repr.attrib_name]description[/repr.attrib_name]: {card.get('description')}"
+    )
+    console.print(
+        f"  [repr.attrib_name]version[/repr.attrib_name]: {card.get('version')}"
+    )
+    console.print(
+        f"  [repr.attrib_name]protocol version[/repr.attrib_name]: {card.get('protocolVersion')}"
+    )
+    skills = card.get("skills", [])
+    console.print(f"\n[bold cyan]Skills ({len(skills)}):[/bold cyan]")
+    for skill in skills:
+        console.print(f"  • {skill.get('id')}: {skill.get('name')}")
+        console.print(f"    [dim]{skill.get('description')}[/dim]")
+        examples = skill.get("examples", [])
+        if examples:
+            console.print(f"    [dim]Examples: {', '.join(examples[:2])}[/dim]")
+    console.print()
+def print_response(response: dict[str, Any], console: Console):
+    """Pretty print the A2A response using Rich."""
+    if "error" in response:
+        console.print(f"[red]Error: {response['error']}[/red]")
+        return
+    result = response.get("result", {})
+    # Get messages from history and artifacts from completed task
+    history = result.get("history", [])
+    artifacts = result.get("artifacts", [])
+    # Print agent messages from history with markdown rendering
+    for msg in history:
+        if msg.get("role") == "agent":
+            for part in msg.get("parts", []):
+                if part.get("kind") == "text":
+                    text = part.get("text", "")
+                    # Render as markdown
+                    console.print()
+                    console.print("[bold green]Answer:[/bold green]")
+                    console.print(Markdown(text))
+    # Print artifacts summary with details
+    if artifacts:
+        console.rule("[dim]Artifacts generated[/dim]")
+        summary_lines = []
+        for artifact in artifacts:
+            name = artifact.get("name", "")
+            parts = artifact.get("parts", [])
+            if name == "search_results" and parts:
+                data = parts[0].get("data", {})
+                query = data.get("query", "")
+                results = data.get("results", [])
+                summary_lines.append(f"🔍 search: '{query}' ({len(results)} results)")
+            elif name == "document" and parts:
+                part = parts[0]
+                if part.get("kind") == "text":
+                    text = part.get("text", "")
+                    length = len(text)
+                    summary_lines.append(f"📄 document ({length} chars)")
+            elif name == "qa_result" and parts:
+                data = parts[0].get("data", {})
+                skill = data.get("skill", "unknown")
+                summary_lines.append(f"💬 {skill}")
+        if summary_lines:
+            console.print(f"[dim]{' • '.join(summary_lines)}[/dim]")
+    console.print()
+async def run_interactive_client(url: str = "http://localhost:8000"):
+    """Run the interactive A2A client.
+    Args:
+        url: Base URL of the A2A server
+    """
+    console = Console()
+    client = A2AClient(url)
+    console.print("[bold]haiku.rag A2A interactive client[/bold]")
+    console.print()
+    # Fetch and display agent card
+    console.print("[dim]Fetching agent card...[/dim]")
+    try:
+        card = await client.get_agent_card()
+        print_agent_card(card, console)
+    except Exception as e:
+        console.print(f"[red]Error fetching agent card: {e}[/red]")
+        await client.close()
+        return
+    # Create a conversation context
+    context_id = str(uuid.uuid4())
+    console.print(f"[dim]context id: {context_id}[/dim]")
+    console.print("[dim]Type your questions (or 'quit' to exit)[/dim]\n")
+    try:
+        while True:
+            try:
+                question = Prompt.ask("[bold blue]Question[/bold blue]").strip()
+                if not question:
+                    continue
+                if question.lower() in ("quit", "exit", "q"):
+                    console.print("\n[dim]Goodbye![/dim]")
+                    break
+                response = await client.send_message(question, context_id=context_id)
+                print_response(response, console)
+            except KeyboardInterrupt:
+                console.print("\n\n[dim]Exiting...[/dim]")
+                break
+            except Exception as e:
+                console.print(f"\n[red]Error: {e}[/red]\n")
+    finally:
+        await client.close()

haiku/rag/a2a/context.py ADDED Viewed

@@ -0,0 +1,68 @@
+import uuid
+from pydantic import TypeAdapter
+from pydantic_ai.messages import ModelMessage
+from pydantic_core import to_jsonable_python
+try:
+    from fasta2a.schema import DataPart, Message  # type: ignore
+except ImportError as e:
+    raise ImportError(
+        "A2A support requires the 'a2a' extra. "
+        "Install with: uv pip install 'haiku.rag[a2a]'"
+    ) from e
+ModelMessagesTypeAdapter = TypeAdapter(list[ModelMessage])
+def load_message_history(context: list[Message]) -> list[ModelMessage]:
+    """Load pydantic-ai message history from A2A context.
+    The context stores serialized pydantic-ai message history directly,
+    which we deserialize and return.
+    Args:
+        context: A2A context messages
+    Returns:
+        List of pydantic-ai ModelMessage objects
+    """
+    if not context:
+        return []
+    # Context should contain a single "state" message with full history
+    for msg in context:
+        parts = msg.get("parts", [])
+        for part in parts:
+            if part.get("kind") == "data":
+                metadata = part.get("metadata", {})
+                if metadata.get("type") == "conversation_state":
+                    stored_history = part.get("data", {}).get("message_history", [])
+                    if stored_history:
+                        return ModelMessagesTypeAdapter.validate_python(stored_history)
+    return []
+def save_message_history(message_history: list[ModelMessage]) -> Message:
+    """Save pydantic-ai message history to A2A context format.
+    Args:
+        message_history: Full pydantic-ai message history
+    Returns:
+        A2A Message containing the serialized state (stored as agent role)
+    """
+    serialized = to_jsonable_python(message_history)
+    return Message(
+        role="agent",
+        parts=[
+            DataPart(
+                kind="data",
+                data={"message_history": serialized},
+                metadata={"type": "conversation_state"},
+            )
+        ],
+        kind="message",
+        message_id=str(uuid.uuid4()),
+    )

haiku/rag/a2a/models.py ADDED Viewed

@@ -0,0 +1,21 @@
+from pydantic import BaseModel, Field
+from haiku.rag.client import HaikuRAG
+class SearchResult(BaseModel):
+    """Search result with both title and URI for A2A agent."""
+    content: str = Field(description="The document text content")
+    score: float = Field(description="Relevance score (higher is more relevant)")
+    document_title: str | None = Field(
+        description="Human-readable document title", default=None
+    )
+    document_uri: str = Field(description="Document URI/path for get_full_document")
+class AgentDependencies(BaseModel):
+    """Dependencies for the A2A conversational agent."""
+    model_config = {"arbitrary_types_allowed": True}
+    client: HaikuRAG

haiku/rag/a2a/prompts.py ADDED Viewed

@@ -0,0 +1,59 @@
+A2A_SYSTEM_PROMPT = """You are Haiku.rag, an AI assistant that helps users find information from a document knowledge base.
+IMPORTANT: You are NOT any person mentioned in the documents. You retrieve and present information about them.
+Tools available:
+- search_documents: Query for relevant text chunks (returns SearchResult objects with content, score, document_title, document_uri)
+- get_full_document: Get complete document content by document_uri
+Your behavior depends on the operation:
+## For direct search requests:
+When the user is explicitly searching (e.g., "search for X", "find documents about Y"):
+- Use search_documents tool ONLY
+- Format results as a numbered list using markdown formatting
+- For each result show:
+  * First line: *Score in italic* | **source in bold** (title if available, otherwise URI)
+  * Second line: The FULL chunk content (do not summarize or truncate)
+- Present results in order of relevance
+- Be concise - just present the search results, do not synthesize or add commentary
+Example format:
+Found 3 relevant results:
+1. *Score: 0.95* | **Python Documentation** (/guides/python.md)
+Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.
+2. *Score: 0.87* | **/guides/python-basics.md**
+Python supports multiple programming paradigms, including structured, object-oriented and functional programming.
+## For question-answering:
+When the user asks a question (e.g., "What is Python?", "How does X work?"):
+- For complex questions, use search_documents MULTIPLE TIMES with DIFFERENT queries to gather comprehensive information
+- Example: For "What are the benefits and drawbacks of Python?", search separately for:
+  * "Python benefits advantages"
+  * "Python drawbacks disadvantages limitations"
+- Synthesize information from all searches into a comprehensive answer
+- Include "Sources:" section at the end listing sources used
+Sources Format:
+List each source with its title/URI and the relevant chunk content (NOT the score).
+Format: "- **[title or URI]**: [chunk content]"
+Example:
+[Your synthesized answer here]
+Sources:
+- **Python Documentation** (/guides/python.md): Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability.
+- **/guides/python-basics.md**: Python supports multiple programming paradigms, including structured, object-oriented and functional programming.
+Critical rules:
+- ONLY answer based on information found via search_documents
+- For comprehensive questions, perform MULTIPLE searches with different query angles
+- NEVER fabricate or assume information
+- If not found, say: "I cannot find information about this in the knowledge base."
+- For follow-ups, understand context (pronouns like "he", "it") but always search for facts
+- In Sources, include the actual chunk content from your search results, not summaries
+Note: When using get_full_document, always use document_uri (not document_title).
+"""

haiku/rag/a2a/skills.py ADDED Viewed

@@ -0,0 +1,75 @@
+try:
+    from fasta2a.schema import Message, Skill  # type: ignore
+except ImportError as e:
+    raise ImportError(
+        "A2A support requires the 'a2a' extra. "
+        "Install with: uv pip install 'haiku.rag[a2a]'"
+    ) from e
+def get_agent_skills() -> list[Skill]:
+    """Define the skills exposed by the haiku.rag A2A agent.
+    Returns:
+        List of skills describing the agent's capabilities
+    """
+    return [
+        Skill(
+            id="document-qa",
+            name="Document Question Answering",
+            description="Answer questions based on a knowledge base of documents using semantic search and retrieval",
+            tags=["question-answering", "search", "knowledge-base", "rag"],
+            input_modes=["application/json"],
+            output_modes=["application/json"],
+            examples=[
+                "What does the documentation say about authentication?",
+                "Find information about Python best practices",
+                "Show me the full API documentation",
+            ],
+        ),
+        Skill(
+            id="document-search",
+            name="Document Search",
+            description="Search for relevant document chunks in the knowledge base using hybrid (semantic and BM25) search",
+            tags=["search", "retrieval", "semantic-search"],
+            input_modes=["application/json"],
+            output_modes=["application/json"],
+            examples=[
+                "Search for Python best practices",
+                "Find documents about authentication",
+                "Look for API documentation",
+            ],
+        ),
+        Skill(
+            id="document-retrieve",
+            name="Document Retrieval",
+            description="Retrieve the complete content of a specific document by its URI",
+            tags=["retrieval", "fetch", "document"],
+            input_modes=["application/json"],
+            output_modes=["application/json"],
+            examples=[
+                "Get the full content of document X",
+                "Retrieve document by URI",
+                "Show me the complete document",
+            ],
+        ),
+    ]
+def extract_question_from_task(task_history: list[Message]) -> str | None:
+    """Extract the user's question from task history.
+    Args:
+        task_history: Task history messages
+    Returns:
+        The question text if found, None otherwise
+    """
+    for msg in task_history:
+        if msg.get("role") == "user":
+            for part in msg.get("parts", []):
+                if part.get("kind") == "text":
+                    text = part.get("text", "").strip()
+                    if text:
+                        return text
+    return None

haiku.rag 0.11.4__py3-none-any.whl → 0.12.1__py3-none-any.whl

Potentially problematic release.

haiku.rag 0.11.4py3-none-any.whl → 0.12.1py3-none-any.whl