PyPI - haiku.rag - Versions diffs - 0.11.4__tar.gz → 0.12.0__tar.gz - Mend

haiku.rag 0.11.4tar.gz → 0.12.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (90) hide show

{haiku_rag-0.11.4 → haiku_rag-0.12.0}/.gitignore RENAMED Viewed

@@ -21,3 +21,7 @@ tests/data/
 TODO.md
 PLAN.md
 DEVNOTES.md
+# mcp registry
+.mcpregistry_github_token
+.mcpregistry_registry_token

haiku_rag-0.12.0/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.13

{haiku_rag-0.11.4 → haiku_rag-0.12.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.11.4
+Version: 0.12.0
 Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT
@@ -18,18 +18,20 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Typing :: Typed
 Requires-Python: >=3.12
-Requires-Dist: docling>=2.52.0
-Requires-Dist: fastmcp>=2.12.3
+Requires-Dist: docling>=2.56.1
+Requires-Dist: fastmcp>=2.12.4
 Requires-Dist: httpx>=0.28.1
-Requires-Dist: lancedb>=0.25.0
-Requires-Dist: pydantic-ai>=1.0.8
-Requires-Dist: pydantic-graph>=1.0.8
-Requires-Dist: pydantic>=2.11.9
+Requires-Dist: lancedb>=0.25.2
+Requires-Dist: pydantic-ai>=1.0.18
+Requires-Dist: pydantic-graph>=1.0.18
+Requires-Dist: pydantic>=2.12.1
 Requires-Dist: python-dotenv>=1.1.1
-Requires-Dist: rich>=14.1.0
-Requires-Dist: tiktoken>=0.11.0
-Requires-Dist: typer>=0.16.1
+Requires-Dist: rich>=14.2.0
+Requires-Dist: tiktoken>=0.12.0
+Requires-Dist: typer>=0.19.2
 Requires-Dist: watchfiles>=1.1.0
+Provides-Extra: a2a
+Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
 Provides-Extra: mxbai
 Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
 Provides-Extra: voyageai
@@ -56,6 +58,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
 - **File monitoring**: Auto-index files when run as server
 - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
 - **MCP server**: Expose as tools for AI assistants
+- **A2A agent**: Conversational agent with context and multi-turn dialogue
 - **CLI & Python API**: Use from command line or Python
 ## Quick Start
@@ -181,6 +184,24 @@ haiku-rag serve --stdio
 Provides tools for document management and search directly in your AI assistant.
+## A2A Agent
+Run as a conversational agent with the Agent-to-Agent protocol:
+```bash
+# Start the A2A server
+haiku-rag serve --a2a
+# Connect with the interactive client (in another terminal)
+haiku-rag a2aclient
+```
+The A2A agent provides:
+- Multi-turn dialogue with context
+- Intelligent multi-search for complex questions
+- Source citations with titles and URIs
+- Full document retrieval on request
 ## Documentation
 Full documentation at: https://ggozad.github.io/haiku.rag/

{haiku_rag-0.11.4 → haiku_rag-0.12.0}/README.md RENAMED Viewed

@@ -18,6 +18,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
 - **File monitoring**: Auto-index files when run as server
 - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
 - **MCP server**: Expose as tools for AI assistants
+- **A2A agent**: Conversational agent with context and multi-turn dialogue
 - **CLI & Python API**: Use from command line or Python
 ## Quick Start
@@ -143,6 +144,24 @@ haiku-rag serve --stdio
 Provides tools for document management and search directly in your AI assistant.
+## A2A Agent
+Run as a conversational agent with the Agent-to-Agent protocol:
+```bash
+# Start the A2A server
+haiku-rag serve --a2a
+# Connect with the interactive client (in another terminal)
+haiku-rag a2aclient
+```
+The A2A agent provides:
+- Multi-turn dialogue with context
+- Intelligent multi-search for complex questions
+- Source citations with titles and URIs
+- Full document retrieval on request
 ## Documentation
 Full documentation at: https://ggozad.github.io/haiku.rag/

{haiku_rag-0.11.4 → haiku_rag-0.12.0}/mkdocs.yml RENAMED Viewed

@@ -64,6 +64,7 @@ nav:
       - Agents: agents.md
       - Python: python.md
       - MCP: mcp.md
+      - A2A: a2a.md
       - Benchmarks: benchmarks.md
 markdown_extensions:
   - admonition

{haiku_rag-0.11.4 → haiku_rag-0.12.0}/pyproject.toml RENAMED Viewed

@@ -2,7 +2,7 @@
 name = "haiku.rag"
 description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
-version = "0.11.4"
+version = "0.12.0"
 authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
 license = { text = "MIT" }
 readme = { file = "README.md", content-type = "text/markdown" }
@@ -23,23 +23,24 @@ classifiers = [
 ]
 dependencies = [
-    "docling>=2.52.0",
-    "fastmcp>=2.12.3",
+    "docling>=2.56.1",
+    "fastmcp>=2.12.4",
     "httpx>=0.28.1",
-    "lancedb>=0.25.0",
-    "pydantic>=2.11.9",
-    "pydantic-ai>=1.0.8",
-    "pydantic-graph>=1.0.8",
+    "lancedb>=0.25.2",
+    "pydantic>=2.12.1",
+    "pydantic-ai>=1.0.18",
+    "pydantic-graph>=1.0.18",
     "python-dotenv>=1.1.1",
-    "rich>=14.1.0",
-    "tiktoken>=0.11.0",
-    "typer>=0.16.1",
+    "rich>=14.2.0",
+    "tiktoken>=0.12.0",
+    "typer>=0.19.2",
     "watchfiles>=1.1.0",
 ]
 [project.optional-dependencies]
 voyageai = ["voyageai>=0.3.5"]
 mxbai = ["mxbai-rerank>=0.1.6"]
+a2a = ["fasta2a>=0.1.0"]
 [project.scripts]
 haiku-rag = "haiku.rag.cli:cli"
@@ -49,7 +50,7 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 [tool.hatch.build]
-exclude = ["/docs", "/tests", "/.github"]
+exclude = ["/docs", "/examples", "/tests", "/.github"]
 [tool.hatch.build.targets.wheel]
 packages = ["src/haiku"]

haiku_rag-0.12.0/server.json ADDED Viewed

@@ -0,0 +1,253 @@
+{
+  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-09-29/server.schema.json",
+  "name": "io.github.ggozad/haiku-rag",
+  "version": "{{VERSION}}",
+  "description": "Agentic Retrieval Augmented Generation (RAG) with LanceDB",
+  "repository": {
+    "url": "https://github.com/ggozad/haiku.rag",
+    "source": "github"
+  },
+  "homepage": "https://github.com/ggozad/haiku.rag",
+  "license": "MIT",
+  "keywords": ["rag", "lancedb", "vector-database", "embeddings", "search", "qa", "research"],
+  "vendor": {
+    "name": "Yiorgis Gozadinos",
+    "url": "https://github.com/ggozad"
+  },
+  "deployment": {
+    "packages": [
+      {
+        "type": "pypi",
+        "package": "haiku.rag",
+        "command": {
+          "linux-x86_64": {
+            "shell": "uvx",
+            "args": ["haiku.rag", "serve", "--stdio"]
+          },
+          "darwin-arm64": {
+            "shell": "uvx",
+            "args": ["haiku.rag", "serve", "--stdio"]
+          },
+          "darwin-x86_64": {
+            "shell": "uvx",
+            "args": ["haiku.rag", "serve", "--stdio"]
+          },
+          "win32-x86_64": {
+            "shell": "uvx.exe",
+            "args": ["haiku.rag", "serve", "--stdio"]
+          }
+        },
+        "environmentVariables": [
+          {
+            "name": "ENV",
+            "description": "Runtime environment (production or development)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "DEFAULT_DATA_DIR",
+            "description": "Default directory for LanceDB data and assets",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "MONITOR_DIRECTORIES",
+            "description": "Comma-separated paths to watch for file changes in server mode",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "LANCEDB_URI",
+            "description": "LanceDB connection URI (use db:// for cloud or a filesystem path)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "LANCEDB_REGION",
+            "description": "LanceDB cloud region (if using cloud)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "LANCEDB_API_KEY",
+            "description": "LanceDB API key (required for LanceDB Cloud)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          },
+          {
+            "name": "EMBEDDINGS_PROVIDER",
+            "description": "Embeddings provider (e.g. ollama, openai, voyageai)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "EMBEDDINGS_MODEL",
+            "description": "Embeddings model name (provider-specific)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "EMBEDDINGS_VECTOR_DIM",
+            "description": "Embedding vector dimension (must match model)",
+            "format": "number",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "QA_PROVIDER",
+            "description": "Question answering provider (e.g. ollama, openai, anthropic)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "QA_MODEL",
+            "description": "Question answering model name (provider-specific)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "RESEARCH_PROVIDER",
+            "description": "Research provider for multi-agent research (e.g. ollama, openai, anthropic)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "RESEARCH_MODEL",
+            "description": "Research model name for multi-agent research (provider-specific)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "RERANK_PROVIDER",
+            "description": "Rerank provider (e.g. mixedbread, cohere)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "RERANK_MODEL",
+            "description": "Rerank model name (provider-specific)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "CHUNK_SIZE",
+            "description": "Chunk size for splitting documents (characters)",
+            "format": "number",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "CONTEXT_CHUNK_RADIUS",
+            "description": "Number of adjacent chunks to include around search hits",
+            "format": "number",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "OLLAMA_BASE_URL",
+            "description": "Base URL for Ollama server",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VLLM_EMBEDDINGS_BASE_URL",
+            "description": "Base URL for vLLM embeddings endpoint",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VLLM_RERANK_BASE_URL",
+            "description": "Base URL for vLLM rerank endpoint",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VLLM_QA_BASE_URL",
+            "description": "Base URL for vLLM QA endpoint",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VLLM_RESEARCH_BASE_URL",
+            "description": "Base URL for vLLM research endpoint",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "MARKDOWN_PREPROCESSOR",
+            "description": "Dotted path or file path to a callable that preprocesses markdown content before chunking",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "DISABLE_DB_AUTOCREATE",
+            "description": "If true, refuse to auto-create a new LanceDB database or tables",
+            "format": "boolean",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VACUUM_RETENTION_SECONDS",
+            "description": "Vacuum retention threshold in seconds (default: 60)",
+            "format": "number",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "OPENAI_API_KEY",
+            "description": "OpenAI API key (if using OpenAI for embeddings or QA)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          },
+          {
+            "name": "VOYAGE_API_KEY",
+            "description": "VoyageAI API key (if using VoyageAI for embeddings)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          },
+          {
+            "name": "ANTHROPIC_API_KEY",
+            "description": "Anthropic API key (if using Anthropic for QA)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          },
+          {
+            "name": "COHERE_API_KEY",
+            "description": "Cohere API key (if using Cohere for reranking)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          }
+        ]
+      }
+    ]
+  },
+  "transports": [
+    {
+      "type": "stdio"
+    }
+  ]
+}

{haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/benchmark.py RENAMED Viewed

@@ -212,8 +212,6 @@ async def run_qa_benchmark(
                 return await qa.answer(question)
             for case in evaluation_dataset.cases:
-                progress.console.print(f"\n[bold]Evaluating case:[/bold] {case.name}")
                 single_case_dataset = EvalDataset[str, str, dict[str, str]](
                     cases=[case],
                     evaluators=evaluation_dataset.evaluators,
@@ -232,32 +230,24 @@ async def run_qa_benchmark(
                     result_case = report.cases[0]
                     equivalence = result_case.assertions.get("answer_equivalent")
-                    progress.console.print(f"Question: {result_case.inputs}")
-                    progress.console.print(f"Expected: {result_case.expected_output}")
-                    progress.console.print(f"Generated: {result_case.output}")
                     if equivalence is not None:
-                        progress.console.print(
-                            f"Equivalent: {equivalence.value}"
-                            + (f" — {equivalence.reason}" if equivalence.reason else "")
-                        )
                         if equivalence.value:
                             passing_cases += 1
-                    progress.console.print("")
                 if report.failures:
                     failures.extend(report.failures)
                     failure = report.failures[0]
                     progress.console.print(
                         "[red]Failure encountered during case evaluation:[/red]"
                     )
-                    progress.console.print(f"Question: {failure.inputs}")
                     progress.console.print(f"Error: {failure.error_message}")
                     progress.console.print("")
-                progress.console.print(
-                    f"[green]Accuracy: {(passing_cases / total_processed):.4f} "
-                    f"{passing_cases}/{total_processed}[/green]"
+                progress.update(
+                    qa_task,
+                    description="[yellow]Evaluating QA cases...[/yellow] "
+                    f"[green]Accuracy: {(passing_cases / total_processed):.2f} "
+                    f"{passing_cases}/{total_processed}[/green]",
                 )
                 progress.advance(qa_task)

haiku_rag-0.12.0/src/haiku/rag/a2a/__init__.py ADDED Viewed

@@ -0,0 +1,176 @@
+import logging
+from contextlib import asynccontextmanager
+from pathlib import Path
+import logfire
+from pydantic_ai import Agent, RunContext
+from haiku.rag.config import Config
+from haiku.rag.graph.common import get_model
+from .context import load_message_history, save_message_history
+from .models import AgentDependencies, SearchResult
+from .prompts import A2A_SYSTEM_PROMPT
+from .skills import extract_question_from_task, get_agent_skills
+from .storage import LRUMemoryStorage
+from .worker import ConversationalWorker
+try:
+    from fasta2a import FastA2A  # type: ignore
+    from fasta2a.broker import InMemoryBroker  # type: ignore
+    from fasta2a.storage import InMemoryStorage  # type: ignore
+except ImportError as e:
+    raise ImportError(
+        "A2A support requires the 'a2a' extra. "
+        "Install with: uv pip install 'haiku.rag[a2a]'"
+    ) from e
+logfire.configure(send_to_logfire="if-token-present", service_name="a2a")
+logfire.instrument_pydantic_ai()
+logger = logging.getLogger(__name__)
+__all__ = [
+    "create_a2a_app",
+    "load_message_history",
+    "save_message_history",
+    "extract_question_from_task",
+    "get_agent_skills",
+    "LRUMemoryStorage",
+]
+def create_a2a_app(
+    db_path: Path,
+    security_schemes: dict | None = None,
+    security: list[dict[str, list[str]]] | None = None,
+):
+    """Create an A2A app for the conversational QA agent.
+    Args:
+        db_path: Path to the LanceDB database
+        security_schemes: Optional security scheme definitions for the AgentCard
+        security: Optional security requirements for the AgentCard
+    Returns:
+        A FastA2A ASGI application
+    """
+    base_storage = InMemoryStorage()
+    storage = LRUMemoryStorage(
+        storage=base_storage, max_contexts=Config.A2A_MAX_CONTEXTS
+    )
+    broker = InMemoryBroker()
+    # Create the agent with native search tool
+    model = get_model(Config.QA_PROVIDER, Config.QA_MODEL)
+    agent = Agent(
+        model=model,
+        deps_type=AgentDependencies,
+        system_prompt=A2A_SYSTEM_PROMPT,
+        retries=3,
+    )
+    @agent.tool
+    async def search_documents(
+        ctx: RunContext[AgentDependencies],
+        query: str,
+        limit: int = 3,
+    ) -> list[SearchResult]:
+        """Search the knowledge base for relevant documents.
+        Returns chunks of text with their relevance scores and document URIs.
+        Use get_full_document if you need to see the complete document content.
+        """
+        search_results = await ctx.deps.client.search(query, limit=limit)
+        expanded_results = await ctx.deps.client.expand_context(search_results)
+        return [
+            SearchResult(
+                content=chunk.content,
+                score=score,
+                document_title=chunk.document_title,
+                document_uri=(chunk.document_uri or ""),
+            )
+            for chunk, score in expanded_results
+        ]
+    @agent.tool
+    async def get_full_document(
+        ctx: RunContext[AgentDependencies],
+        document_uri: str,
+    ) -> str:
+        """Retrieve the complete content of a document by its URI.
+        Use this when you need more context than what's in a search result chunk.
+        The document_uri comes from search_documents results.
+        """
+        document = await ctx.deps.client.get_document_by_uri(document_uri)
+        if document is None:
+            return f"Document not found: {document_uri}"
+        return document.content
+    worker = ConversationalWorker(
+        storage=storage,
+        broker=broker,
+        db_path=db_path,
+        agent=agent,  # type: ignore
+    )
+    # Create FastA2A app with custom worker lifecycle
+    @asynccontextmanager
+    async def lifespan(app):
+        logger.info(f"Started A2A server (max contexts: {Config.A2A_MAX_CONTEXTS})")
+        async with app.task_manager:
+            async with worker.run():
+                yield
+    app = FastA2A(
+        storage=storage,
+        broker=broker,
+        name="haiku-rag",
+        description="Conversational question answering agent powered by haiku.rag RAG system",
+        skills=get_agent_skills(),
+        lifespan=lifespan,
+    )
+    # Add security configuration if provided
+    if security_schemes or security:
+        # Monkey-patch the agent card endpoint to include security
+        async def _agent_card_endpoint_with_security(request):
+            from fasta2a.schema import AgentCapabilities, AgentCard, agent_card_ta
+            from starlette.responses import Response
+            if app._agent_card_json_schema is None:
+                agent_card = AgentCard(
+                    name=app.name,
+                    description=app.description
+                    or "An AI agent exposed as an A2A agent.",
+                    url=app.url,
+                    version=app.version,
+                    protocol_version="0.3.0",
+                    skills=app.skills,
+                    default_input_modes=app.default_input_modes,
+                    default_output_modes=app.default_output_modes,
+                    capabilities=AgentCapabilities(
+                        streaming=False,
+                        push_notifications=False,
+                        state_transition_history=False,
+                    ),
+                )
+                if app.provider is not None:
+                    agent_card["provider"] = app.provider
+                if security_schemes:
+                    agent_card["security_schemes"] = security_schemes
+                if security:
+                    agent_card["security"] = security
+                app._agent_card_json_schema = agent_card_ta.dump_json(
+                    agent_card, by_alias=True
+                )
+            return Response(
+                content=app._agent_card_json_schema, media_type="application/json"
+            )
+        app._agent_card_endpoint = _agent_card_endpoint_with_security
+    return app

haiku.rag 0.11.4__tar.gz → 0.12.0__tar.gz

Potentially problematic release.

haiku.rag 0.11.4tar.gz → 0.12.0tar.gz