PyPI - haiku.rag - Versions diffs - 0.11.3__tar.gz → 0.12.0__tar.gz - Mend

haiku.rag 0.11.3tar.gz → 0.12.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (90) hide show

{haiku_rag-0.11.3 → haiku_rag-0.12.0}/.gitignore RENAMED Viewed

@@ -21,3 +21,7 @@ tests/data/
 TODO.md
 PLAN.md
 DEVNOTES.md
+# mcp registry
+.mcpregistry_github_token
+.mcpregistry_registry_token

haiku_rag-0.12.0/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.13

{haiku_rag-0.11.3 → haiku_rag-0.12.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.11.3
+Version: 0.12.0
 Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT
@@ -18,18 +18,20 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Typing :: Typed
 Requires-Python: >=3.12
-Requires-Dist: docling>=2.52.0
-Requires-Dist: fastmcp>=2.12.3
+Requires-Dist: docling>=2.56.1
+Requires-Dist: fastmcp>=2.12.4
 Requires-Dist: httpx>=0.28.1
-Requires-Dist: lancedb>=0.25.0
-Requires-Dist: pydantic-ai>=1.0.8
-Requires-Dist: pydantic-graph>=1.0.8
-Requires-Dist: pydantic>=2.11.9
+Requires-Dist: lancedb>=0.25.2
+Requires-Dist: pydantic-ai>=1.0.18
+Requires-Dist: pydantic-graph>=1.0.18
+Requires-Dist: pydantic>=2.12.1
 Requires-Dist: python-dotenv>=1.1.1
-Requires-Dist: rich>=14.1.0
-Requires-Dist: tiktoken>=0.11.0
-Requires-Dist: typer>=0.16.1
+Requires-Dist: rich>=14.2.0
+Requires-Dist: tiktoken>=0.12.0
+Requires-Dist: typer>=0.19.2
 Requires-Dist: watchfiles>=1.1.0
+Provides-Extra: a2a
+Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
 Provides-Extra: mxbai
 Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
 Provides-Extra: voyageai
@@ -56,6 +58,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
 - **File monitoring**: Auto-index files when run as server
 - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
 - **MCP server**: Expose as tools for AI assistants
+- **A2A agent**: Conversational agent with context and multi-turn dialogue
 - **CLI & Python API**: Use from command line or Python
 ## Quick Start
@@ -181,6 +184,24 @@ haiku-rag serve --stdio
 Provides tools for document management and search directly in your AI assistant.
+## A2A Agent
+Run as a conversational agent with the Agent-to-Agent protocol:
+```bash
+# Start the A2A server
+haiku-rag serve --a2a
+# Connect with the interactive client (in another terminal)
+haiku-rag a2aclient
+```
+The A2A agent provides:
+- Multi-turn dialogue with context
+- Intelligent multi-search for complex questions
+- Source citations with titles and URIs
+- Full document retrieval on request
 ## Documentation
 Full documentation at: https://ggozad.github.io/haiku.rag/

{haiku_rag-0.11.3 → haiku_rag-0.12.0}/README.md RENAMED Viewed

@@ -18,6 +18,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
 - **File monitoring**: Auto-index files when run as server
 - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
 - **MCP server**: Expose as tools for AI assistants
+- **A2A agent**: Conversational agent with context and multi-turn dialogue
 - **CLI & Python API**: Use from command line or Python
 ## Quick Start
@@ -143,6 +144,24 @@ haiku-rag serve --stdio
 Provides tools for document management and search directly in your AI assistant.
+## A2A Agent
+Run as a conversational agent with the Agent-to-Agent protocol:
+```bash
+# Start the A2A server
+haiku-rag serve --a2a
+# Connect with the interactive client (in another terminal)
+haiku-rag a2aclient
+```
+The A2A agent provides:
+- Multi-turn dialogue with context
+- Intelligent multi-search for complex questions
+- Source citations with titles and URIs
+- Full document retrieval on request
 ## Documentation
 Full documentation at: https://ggozad.github.io/haiku.rag/

{haiku_rag-0.11.3 → haiku_rag-0.12.0}/mkdocs.yml RENAMED Viewed

@@ -64,6 +64,7 @@ nav:
       - Agents: agents.md
       - Python: python.md
       - MCP: mcp.md
+      - A2A: a2a.md
       - Benchmarks: benchmarks.md
 markdown_extensions:
   - admonition

{haiku_rag-0.11.3 → haiku_rag-0.12.0}/pyproject.toml RENAMED Viewed

@@ -2,7 +2,7 @@
 name = "haiku.rag"
 description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
-version = "0.11.3"
+version = "0.12.0"
 authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
 license = { text = "MIT" }
 readme = { file = "README.md", content-type = "text/markdown" }
@@ -23,23 +23,24 @@ classifiers = [
 ]
 dependencies = [
-    "docling>=2.52.0",
-    "fastmcp>=2.12.3",
+    "docling>=2.56.1",
+    "fastmcp>=2.12.4",
     "httpx>=0.28.1",
-    "lancedb>=0.25.0",
-    "pydantic>=2.11.9",
-    "pydantic-ai>=1.0.8",
-    "pydantic-graph>=1.0.8",
+    "lancedb>=0.25.2",
+    "pydantic>=2.12.1",
+    "pydantic-ai>=1.0.18",
+    "pydantic-graph>=1.0.18",
     "python-dotenv>=1.1.1",
-    "rich>=14.1.0",
-    "tiktoken>=0.11.0",
-    "typer>=0.16.1",
+    "rich>=14.2.0",
+    "tiktoken>=0.12.0",
+    "typer>=0.19.2",
     "watchfiles>=1.1.0",
 ]
 [project.optional-dependencies]
 voyageai = ["voyageai>=0.3.5"]
 mxbai = ["mxbai-rerank>=0.1.6"]
+a2a = ["fasta2a>=0.1.0"]
 [project.scripts]
 haiku-rag = "haiku.rag.cli:cli"
@@ -49,7 +50,7 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 [tool.hatch.build]
-exclude = ["/docs", "/tests", "/.github"]
+exclude = ["/docs", "/examples", "/tests", "/.github"]
 [tool.hatch.build.targets.wheel]
 packages = ["src/haiku"]

haiku_rag-0.12.0/server.json ADDED Viewed

@@ -0,0 +1,253 @@
+{
+  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-09-29/server.schema.json",
+  "name": "io.github.ggozad/haiku-rag",
+  "version": "{{VERSION}}",
+  "description": "Agentic Retrieval Augmented Generation (RAG) with LanceDB",
+  "repository": {
+    "url": "https://github.com/ggozad/haiku.rag",
+    "source": "github"
+  },
+  "homepage": "https://github.com/ggozad/haiku.rag",
+  "license": "MIT",
+  "keywords": ["rag", "lancedb", "vector-database", "embeddings", "search", "qa", "research"],
+  "vendor": {
+    "name": "Yiorgis Gozadinos",
+    "url": "https://github.com/ggozad"
+  },
+  "deployment": {
+    "packages": [
+      {
+        "type": "pypi",
+        "package": "haiku.rag",
+        "command": {
+          "linux-x86_64": {
+            "shell": "uvx",
+            "args": ["haiku.rag", "serve", "--stdio"]
+          },
+          "darwin-arm64": {
+            "shell": "uvx",
+            "args": ["haiku.rag", "serve", "--stdio"]
+          },
+          "darwin-x86_64": {
+            "shell": "uvx",
+            "args": ["haiku.rag", "serve", "--stdio"]
+          },
+          "win32-x86_64": {
+            "shell": "uvx.exe",
+            "args": ["haiku.rag", "serve", "--stdio"]
+          }
+        },
+        "environmentVariables": [
+          {
+            "name": "ENV",
+            "description": "Runtime environment (production or development)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "DEFAULT_DATA_DIR",
+            "description": "Default directory for LanceDB data and assets",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "MONITOR_DIRECTORIES",
+            "description": "Comma-separated paths to watch for file changes in server mode",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "LANCEDB_URI",
+            "description": "LanceDB connection URI (use db:// for cloud or a filesystem path)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "LANCEDB_REGION",
+            "description": "LanceDB cloud region (if using cloud)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "LANCEDB_API_KEY",
+            "description": "LanceDB API key (required for LanceDB Cloud)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          },
+          {
+            "name": "EMBEDDINGS_PROVIDER",
+            "description": "Embeddings provider (e.g. ollama, openai, voyageai)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "EMBEDDINGS_MODEL",
+            "description": "Embeddings model name (provider-specific)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "EMBEDDINGS_VECTOR_DIM",
+            "description": "Embedding vector dimension (must match model)",
+            "format": "number",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "QA_PROVIDER",
+            "description": "Question answering provider (e.g. ollama, openai, anthropic)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "QA_MODEL",
+            "description": "Question answering model name (provider-specific)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "RESEARCH_PROVIDER",
+            "description": "Research provider for multi-agent research (e.g. ollama, openai, anthropic)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "RESEARCH_MODEL",
+            "description": "Research model name for multi-agent research (provider-specific)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "RERANK_PROVIDER",
+            "description": "Rerank provider (e.g. mixedbread, cohere)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "RERANK_MODEL",
+            "description": "Rerank model name (provider-specific)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "CHUNK_SIZE",
+            "description": "Chunk size for splitting documents (characters)",
+            "format": "number",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "CONTEXT_CHUNK_RADIUS",
+            "description": "Number of adjacent chunks to include around search hits",
+            "format": "number",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "OLLAMA_BASE_URL",
+            "description": "Base URL for Ollama server",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VLLM_EMBEDDINGS_BASE_URL",
+            "description": "Base URL for vLLM embeddings endpoint",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VLLM_RERANK_BASE_URL",
+            "description": "Base URL for vLLM rerank endpoint",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VLLM_QA_BASE_URL",
+            "description": "Base URL for vLLM QA endpoint",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VLLM_RESEARCH_BASE_URL",
+            "description": "Base URL for vLLM research endpoint",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "MARKDOWN_PREPROCESSOR",
+            "description": "Dotted path or file path to a callable that preprocesses markdown content before chunking",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "DISABLE_DB_AUTOCREATE",
+            "description": "If true, refuse to auto-create a new LanceDB database or tables",
+            "format": "boolean",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "VACUUM_RETENTION_SECONDS",
+            "description": "Vacuum retention threshold in seconds (default: 60)",
+            "format": "number",
+            "isRequired": false,
+            "isSecret": false
+          },
+          {
+            "name": "OPENAI_API_KEY",
+            "description": "OpenAI API key (if using OpenAI for embeddings or QA)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          },
+          {
+            "name": "VOYAGE_API_KEY",
+            "description": "VoyageAI API key (if using VoyageAI for embeddings)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          },
+          {
+            "name": "ANTHROPIC_API_KEY",
+            "description": "Anthropic API key (if using Anthropic for QA)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          },
+          {
+            "name": "COHERE_API_KEY",
+            "description": "Cohere API key (if using Cohere for reranking)",
+            "format": "string",
+            "isRequired": false,
+            "isSecret": true
+          }
+        ]
+      }
+    ]
+  },
+  "transports": [
+    {
+      "type": "stdio"
+    }
+  ]
+}

{haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/benchmark.py RENAMED Viewed

@@ -15,6 +15,7 @@ from rich.progress import Progress
 from evaluations.config import DatasetSpec, RetrievalSample
 from evaluations.datasets import DATASETS
 from evaluations.llm_judge import ANSWER_EQUIVALENCE_RUBRIC
+from evaluations.prompts import WIX_SUPPORT_PROMPT
 from haiku.rag import logging  # noqa: F401
 from haiku.rag.client import HaikuRAG
 from haiku.rag.config import Config
@@ -61,7 +62,6 @@ async def populate_db(spec: DatasetSpec) -> None:
                     metadata=payload.metadata,
                 )
                 progress.advance(task)
-            rag.store.vacuum()
 def _is_relevant_match(retrieved_uri: str | None, sample: RetrievalSample) -> bool:
@@ -80,6 +80,11 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
         3: 0.0,
         5: 0.0,
     }
+    success_totals = {
+        1: 0.0,
+        3: 0.0,
+        5: 0.0,
+    }
     total_queries = 0
     with Progress() as progress:
@@ -109,15 +114,16 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
                     if retrieved_doc and retrieved_doc.uri:
                         retrieved_uris.append(retrieved_doc.uri)
-                # Compute per-query recall@K by counting how many relevant
-                # documents are retrieved within the first K results and
-                # averaging these fractions across all queries.
+                # Compute metrics for each cutoff
                 for cutoff in (1, 3, 5):
                     top_k = set(retrieved_uris[:cutoff])
                     relevant = set(sample.expected_uris)
                     if relevant:
                         matched = len(top_k & relevant)
+                        # Recall: fraction of relevant docs retrieved
                         recall_totals[cutoff] += matched / len(relevant)
+                        # Success: binary - did we get at least one relevant doc?
+                        success_totals[cutoff] += 1.0 if matched > 0 else 0.0
                 progress.advance(task)
@@ -129,16 +135,28 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
     recall_at_3 = recall_totals[3] / total_queries
     recall_at_5 = recall_totals[5] / total_queries
+    success_at_1 = success_totals[1] / total_queries
+    success_at_3 = success_totals[3] / total_queries
+    success_at_5 = success_totals[5] / total_queries
     console.print("\n=== Retrieval Benchmark Results ===", style="bold cyan")
     console.print(f"Total queries: {total_queries}")
-    console.print(f"Recall@1: {recall_at_1:.4f}")
-    console.print(f"Recall@3: {recall_at_3:.4f}")
-    console.print(f"Recall@5: {recall_at_5:.4f}")
+    console.print("\nRecall@K (fraction of relevant docs retrieved):")
+    console.print(f"  Recall@1: {recall_at_1:.4f}")
+    console.print(f"  Recall@3: {recall_at_3:.4f}")
+    console.print(f"  Recall@5: {recall_at_5:.4f}")
+    console.print("\nSuccess@K (queries with at least one relevant doc):")
+    console.print(f"  Success@1: {success_at_1:.4f} ({success_at_1 * 100:.1f}%)")
+    console.print(f"  Success@3: {success_at_3:.4f} ({success_at_3 * 100:.1f}%)")
+    console.print(f"  Success@5: {success_at_5:.4f} ({success_at_5 * 100:.1f}%)")
     return {
         "recall@1": recall_at_1,
         "recall@3": recall_at_3,
         "recall@5": recall_at_5,
+        "success@1": success_at_1,
+        "success@3": success_at_3,
+        "success@5": success_at_5,
     }
@@ -187,14 +205,13 @@ async def run_qa_benchmark(
         )
         async with HaikuRAG(spec.db_path) as rag:
-            qa = get_qa_agent(rag)
+            system_prompt = WIX_SUPPORT_PROMPT if spec.key == "wix" else None
+            qa = get_qa_agent(rag, system_prompt=system_prompt)
             async def answer_question(question: str) -> str:
                 return await qa.answer(question)
             for case in evaluation_dataset.cases:
-                progress.console.print(f"\n[bold]Evaluating case:[/bold] {case.name}")
                 single_case_dataset = EvalDataset[str, str, dict[str, str]](
                     cases=[case],
                     evaluators=evaluation_dataset.evaluators,
@@ -213,32 +230,24 @@ async def run_qa_benchmark(
                     result_case = report.cases[0]
                     equivalence = result_case.assertions.get("answer_equivalent")
-                    progress.console.print(f"Question: {result_case.inputs}")
-                    progress.console.print(f"Expected: {result_case.expected_output}")
-                    progress.console.print(f"Generated: {result_case.output}")
                     if equivalence is not None:
-                        progress.console.print(
-                            f"Equivalent: {equivalence.value}"
-                            + (f" — {equivalence.reason}" if equivalence.reason else "")
-                        )
                         if equivalence.value:
                             passing_cases += 1
-                    progress.console.print("")
                 if report.failures:
                     failures.extend(report.failures)
                     failure = report.failures[0]
                     progress.console.print(
                         "[red]Failure encountered during case evaluation:[/red]"
                     )
-                    progress.console.print(f"Question: {failure.inputs}")
                     progress.console.print(f"Error: {failure.error_message}")
                     progress.console.print("")
-                progress.console.print(
-                    f"[green]Accuracy: {(passing_cases / total_processed):.4f} "
-                    f"{passing_cases}/{total_processed}[/green]"
+                progress.update(
+                    qa_task,
+                    description="[yellow]Evaluating QA cases...[/yellow] "
+                    f"[green]Accuracy: {(passing_cases / total_processed):.2f} "
+                    f"{passing_cases}/{total_processed}[/green]",
                 )
                 progress.advance(qa_task)

haiku_rag-0.12.0/src/evaluations/prompts.py ADDED Viewed

@@ -0,0 +1,22 @@
+WIX_SUPPORT_PROMPT = """
+You are a WIX technical support expert helping users with questions about the WIX platform.
+Your process:
+1. When a user asks a question, use the search_documents tool to find relevant information
+2. Search with specific keywords and phrases from the user's question
+3. Review the search results and their relevance scores
+4. If you need additional context, perform follow-up searches with different keywords
+5. Provide a short and to the point comprehensive answer based only on the retrieved documents
+Guidelines:
+- Base your answers strictly on the provided document content
+- Quote or reference specific information when possible
+- If multiple documents contain relevant information, synthesize them coherently
+- Indicate when information is incomplete or when you need to search for additional context
+- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
+- For complex questions, consider breaking them down and performing multiple searches
+- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
+Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
+/no_think
+"""

haiku.rag 0.11.3__tar.gz → 0.12.0__tar.gz

Potentially problematic release.

haiku.rag 0.11.3tar.gz → 0.12.0tar.gz