PyPI - haiku.rag-slim - Versions diffs - 0.16.0__py3-none-any.whl - Mend

haiku.rag-slim 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag-slim might be problematic. Click here for more details.

Files changed (71) hide show

haiku/rag/__init__.py +0 -0
haiku/rag/app.py +542 -0
haiku/rag/chunker.py +65 -0
haiku/rag/cli.py +466 -0
haiku/rag/client.py +731 -0
haiku/rag/config/__init__.py +74 -0
haiku/rag/config/loader.py +94 -0
haiku/rag/config/models.py +99 -0
haiku/rag/embeddings/__init__.py +49 -0
haiku/rag/embeddings/base.py +25 -0
haiku/rag/embeddings/ollama.py +28 -0
haiku/rag/embeddings/openai.py +26 -0
haiku/rag/embeddings/vllm.py +29 -0
haiku/rag/embeddings/voyageai.py +27 -0
haiku/rag/graph/__init__.py +26 -0
haiku/rag/graph/agui/__init__.py +53 -0
haiku/rag/graph/agui/cli_renderer.py +135 -0
haiku/rag/graph/agui/emitter.py +197 -0
haiku/rag/graph/agui/events.py +254 -0
haiku/rag/graph/agui/server.py +310 -0
haiku/rag/graph/agui/state.py +34 -0
haiku/rag/graph/agui/stream.py +86 -0
haiku/rag/graph/common/__init__.py +5 -0
haiku/rag/graph/common/models.py +42 -0
haiku/rag/graph/common/nodes.py +265 -0
haiku/rag/graph/common/prompts.py +46 -0
haiku/rag/graph/common/utils.py +44 -0
haiku/rag/graph/deep_qa/__init__.py +1 -0
haiku/rag/graph/deep_qa/dependencies.py +27 -0
haiku/rag/graph/deep_qa/graph.py +243 -0
haiku/rag/graph/deep_qa/models.py +20 -0
haiku/rag/graph/deep_qa/prompts.py +59 -0
haiku/rag/graph/deep_qa/state.py +56 -0
haiku/rag/graph/research/__init__.py +3 -0
haiku/rag/graph/research/common.py +87 -0
haiku/rag/graph/research/dependencies.py +151 -0
haiku/rag/graph/research/graph.py +295 -0
haiku/rag/graph/research/models.py +166 -0
haiku/rag/graph/research/prompts.py +107 -0
haiku/rag/graph/research/state.py +85 -0
haiku/rag/logging.py +56 -0
haiku/rag/mcp.py +245 -0
haiku/rag/monitor.py +194 -0
haiku/rag/qa/__init__.py +33 -0
haiku/rag/qa/agent.py +93 -0
haiku/rag/qa/prompts.py +60 -0
haiku/rag/reader.py +135 -0
haiku/rag/reranking/__init__.py +63 -0
haiku/rag/reranking/base.py +13 -0
haiku/rag/reranking/cohere.py +34 -0
haiku/rag/reranking/mxbai.py +28 -0
haiku/rag/reranking/vllm.py +44 -0
haiku/rag/reranking/zeroentropy.py +59 -0
haiku/rag/store/__init__.py +4 -0
haiku/rag/store/engine.py +309 -0
haiku/rag/store/models/__init__.py +4 -0
haiku/rag/store/models/chunk.py +17 -0
haiku/rag/store/models/document.py +17 -0
haiku/rag/store/repositories/__init__.py +9 -0
haiku/rag/store/repositories/chunk.py +442 -0
haiku/rag/store/repositories/document.py +261 -0
haiku/rag/store/repositories/settings.py +165 -0
haiku/rag/store/upgrades/__init__.py +62 -0
haiku/rag/store/upgrades/v0_10_1.py +64 -0
haiku/rag/store/upgrades/v0_9_3.py +112 -0
haiku/rag/utils.py +211 -0
haiku_rag_slim-0.16.0.dist-info/METADATA +128 -0
haiku_rag_slim-0.16.0.dist-info/RECORD +71 -0
haiku_rag_slim-0.16.0.dist-info/WHEEL +4 -0
haiku_rag_slim-0.16.0.dist-info/entry_points.txt +2 -0
haiku_rag_slim-0.16.0.dist-info/licenses/LICENSE +7 -0

haiku/rag/reranking/vllm.py ADDED Viewed

@@ -0,0 +1,44 @@
+import httpx
+from haiku.rag.config import Config
+from haiku.rag.reranking.base import RerankerBase
+from haiku.rag.store.models.chunk import Chunk
+class VLLMReranker(RerankerBase):
+    def __init__(self, model: str):
+        self._model = model
+        self._base_url = Config.providers.vllm.rerank_base_url
+    async def rerank(
+        self, query: str, chunks: list[Chunk], top_n: int = 10
+    ) -> list[tuple[Chunk, float]]:
+        if not chunks:
+            return []
+        # Prepare documents for reranking
+        documents = [chunk.content for chunk in chunks]
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{self._base_url}/v1/rerank",
+                json={"model": self._model, "query": query, "documents": documents},
+                headers={
+                    "accept": "application/json",
+                    "Content-Type": "application/json",
+                },
+            )
+            response.raise_for_status()
+            result = response.json()
+            # Extract scores and pair with chunks
+            scored_chunks = []
+            for item in result.get("results", []):
+                index = item["index"]
+                score = item["relevance_score"]
+                scored_chunks.append((chunks[index], score))
+            # Sort by score (descending) and return top_n
+            scored_chunks.sort(key=lambda x: x[1], reverse=True)
+            return scored_chunks[:top_n]

haiku/rag/reranking/zeroentropy.py ADDED Viewed

@@ -0,0 +1,59 @@
+from zeroentropy import ZeroEntropy
+from haiku.rag.reranking.base import RerankerBase
+from haiku.rag.store.models.chunk import Chunk
+class ZeroEntropyReranker(RerankerBase):
+    """Zero Entropy reranker implementation using the zerank-1 model."""
+    def __init__(self, model: str = "zerank-1"):
+        """Initialize the Zero Entropy reranker.
+        Args:
+            model: The Zero Entropy model to use (default: "zerank-1")
+        """
+        self._model = model
+        # Zero Entropy SDK reads ZEROENTROPY_API_KEY from environment by default
+        self._client = ZeroEntropy()
+    async def rerank(
+        self, query: str, chunks: list[Chunk], top_n: int = 10
+    ) -> list[tuple[Chunk, float]]:
+        """Rerank the given chunks based on relevance to the query.
+        Args:
+            query: The query to rank against
+            chunks: The chunks to rerank
+            top_n: The number of top results to return
+        Returns:
+            A list of (chunk, score) tuples, sorted by relevance
+        """
+        if not chunks:
+            return []
+        # Prepare documents for Zero Entropy API
+        documents = [chunk.content for chunk in chunks]
+        # Call Zero Entropy reranking API
+        response = self._client.models.rerank(
+            model=self._model,
+            query=query,
+            documents=documents,
+        )
+        # Extract results and map back to chunks
+        # Zero Entropy returns results sorted by relevance with scores
+        reranked_results = []
+        # Get top_n results
+        for i, result in enumerate(response.results[:top_n]):
+            # Zero Entropy returns index and score for each document
+            chunk_index = result.index
+            score = result.relevance_score
+            if chunk_index < len(chunks):
+                reranked_results.append((chunks[chunk_index], score))
+        return reranked_results

haiku/rag/store/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .engine import Store
+from .models import Chunk, Document
+__all__ = ["Store", "Chunk", "Document"]

haiku/rag/store/engine.py ADDED Viewed

@@ -0,0 +1,309 @@
+import asyncio
+import json
+import logging
+from datetime import timedelta
+from importlib import metadata
+from pathlib import Path
+from uuid import uuid4
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from pydantic import Field
+from haiku.rag.config import AppConfig, Config
+from haiku.rag.embeddings import get_embedder
+logger = logging.getLogger(__name__)
+class DocumentRecord(LanceModel):
+    id: str = Field(default_factory=lambda: str(uuid4()))
+    content: str
+    uri: str | None = None
+    title: str | None = None
+    metadata: str = Field(default="{}")
+    created_at: str = Field(default_factory=lambda: "")
+    updated_at: str = Field(default_factory=lambda: "")
+def create_chunk_model(vector_dim: int):
+    """Create a ChunkRecord model with the specified vector dimension.
+    This creates a model with proper vector typing for LanceDB.
+    """
+    class ChunkRecord(LanceModel):
+        id: str = Field(default_factory=lambda: str(uuid4()))
+        document_id: str
+        content: str
+        metadata: str = Field(default="{}")
+        order: int = Field(default=0)
+        vector: Vector(vector_dim) = Field(default_factory=lambda: [0.0] * vector_dim)  # type: ignore
+    return ChunkRecord
+class SettingsRecord(LanceModel):
+    id: str = Field(default="settings")
+    settings: str = Field(default="{}")
+class Store:
+    def __init__(
+        self,
+        db_path: Path,
+        config: AppConfig = Config,
+        skip_validation: bool = False,
+        allow_create: bool = True,
+    ):
+        self.db_path: Path = db_path
+        self._config = config
+        self.embedder = get_embedder(config=self._config)
+        self._vacuum_lock = asyncio.Lock()
+        # Create the ChunkRecord model with the correct vector dimension
+        self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
+        # Local filesystem handling for DB directory
+        if not self._has_cloud_config():
+            if not allow_create:
+                # Read operations should not create the database
+                if not db_path.exists():
+                    raise FileNotFoundError(
+                        f"Database does not exist: {db_path}. Use a write operation (add, add-src) to create it."
+                    )
+            else:
+                # Write operations - ensure parent directories exist
+                if not db_path.parent.exists():
+                    Path.mkdir(db_path.parent, parents=True)
+        # Connect to LanceDB
+        self.db = self._connect_to_lancedb(db_path)
+        # Initialize tables
+        self.create_or_update_db()
+        # Validate config compatibility after connection is established
+        if not skip_validation:
+            self._validate_configuration()
+    async def vacuum(self, retention_seconds: int | None = None) -> None:
+        """Optimize and clean up old versions across all tables to reduce disk usage.
+        Args:
+            retention_seconds: Retention threshold in seconds. Only versions older
+                              than this will be removed. If None, uses config.storage.vacuum_retention_seconds.
+        Note:
+            If vacuum is already running, this method returns immediately without blocking.
+            Use asyncio.create_task(store.vacuum()) for non-blocking background execution.
+        """
+        if self._has_cloud_config() and str(self._config.lancedb.uri).startswith(
+            "db://"
+        ):
+            return
+        # Skip if already running (non-blocking)
+        if self._vacuum_lock.locked():
+            return
+        async with self._vacuum_lock:
+            try:
+                # Evaluate config at runtime to allow dynamic changes
+                if retention_seconds is None:
+                    retention_seconds = self._config.storage.vacuum_retention_seconds
+                # Perform maintenance per table using optimize() with configurable retention
+                retention = timedelta(seconds=retention_seconds)
+                for table in [
+                    self.documents_table,
+                    self.chunks_table,
+                    self.settings_table,
+                ]:
+                    table.optimize(cleanup_older_than=retention)
+            except (RuntimeError, OSError) as e:
+                # Handle resource errors gracefully
+                logger.debug(f"Vacuum skipped due to resource constraints: {e}")
+    def _connect_to_lancedb(self, db_path: Path):
+        """Establish connection to LanceDB (local, cloud, or object storage)."""
+        # Check if we have cloud configuration
+        if self._has_cloud_config():
+            return lancedb.connect(
+                uri=self._config.lancedb.uri,
+                api_key=self._config.lancedb.api_key,
+                region=self._config.lancedb.region,
+            )
+        else:
+            # Local file system connection
+            return lancedb.connect(db_path)
+    def _has_cloud_config(self) -> bool:
+        """Check if cloud configuration is complete."""
+        return bool(
+            self._config.lancedb.uri
+            and self._config.lancedb.api_key
+            and self._config.lancedb.region
+        )
+    def _validate_configuration(self) -> None:
+        """Validate that the configuration is compatible with the database."""
+        from haiku.rag.store.repositories.settings import SettingsRepository
+        settings_repo = SettingsRepository(self)
+        settings_repo.validate_config_compatibility()
+    def create_or_update_db(self):
+        """Create the database tables."""
+        # Get list of existing tables
+        existing_tables = self.db.table_names()
+        # Create or get documents table
+        if "documents" in existing_tables:
+            self.documents_table = self.db.open_table("documents")
+        else:
+            self.documents_table = self.db.create_table(
+                "documents", schema=DocumentRecord
+            )
+        # Create or get chunks table
+        if "chunks" in existing_tables:
+            self.chunks_table = self.db.open_table("chunks")
+        else:
+            self.chunks_table = self.db.create_table("chunks", schema=self.ChunkRecord)
+            # Create FTS index on the new table with phrase query support
+            self.chunks_table.create_fts_index(
+                "content", replace=True, with_position=True, remove_stop_words=False
+            )
+        # Create or get settings table
+        if "settings" in existing_tables:
+            self.settings_table = self.db.open_table("settings")
+        else:
+            self.settings_table = self.db.create_table(
+                "settings", schema=SettingsRecord
+            )
+            # Save current settings to the new database
+            settings_data = self._config.model_dump(mode="json")
+            self.settings_table.add(
+                [SettingsRecord(id="settings", settings=json.dumps(settings_data))]
+            )
+        # Run pending upgrades based on stored version and package version
+        try:
+            from haiku.rag.store.upgrades import run_pending_upgrades
+            current_version = metadata.version("haiku.rag-slim")
+            db_version = self.get_haiku_version()
+            if db_version != "0.0.0":
+                run_pending_upgrades(self, db_version, current_version)
+            # After upgrades complete (or if none), set stored version
+            # to the greater of the installed package version and the
+            # highest available upgrade step version in code.
+            try:
+                from packaging.version import parse as _v
+                from haiku.rag.store.upgrades import upgrades as _steps
+                highest_step = max((_v(u.version) for u in _steps), default=None)
+                effective_version = (
+                    str(max(_v(current_version), highest_step))
+                    if highest_step is not None
+                    else current_version
+                )
+            except Exception:
+                effective_version = current_version
+            self.set_haiku_version(effective_version)
+        except Exception as e:
+            # Avoid hard failure on initial connection; log and continue so CLI remains usable.
+            logger.warning(
+                "Skipping upgrade due to error (db=%s -> pkg=%s): %s",
+                self.get_haiku_version(),
+                metadata.version("haiku.rag-slim"),
+                e,
+            )
+    def get_haiku_version(self) -> str:
+        """Returns the user version stored in settings."""
+        settings_records = list(
+            self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
+        )
+        if settings_records:
+            settings = (
+                json.loads(settings_records[0].settings)
+                if settings_records[0].settings
+                else {}
+            )
+            return settings.get("version", "0.0.0")
+        return "0.0.0"
+    def set_haiku_version(self, version: str) -> None:
+        """Updates the user version in settings."""
+        settings_records = list(
+            self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
+        )
+        if settings_records:
+            # Only write if version actually changes to avoid creating new table versions
+            current = (
+                json.loads(settings_records[0].settings)
+                if settings_records[0].settings
+                else {}
+            )
+            if current.get("version") != version:
+                current["version"] = version
+                self.settings_table.update(
+                    where="id = 'settings'",
+                    values={"settings": json.dumps(current)},
+                )
+        else:
+            # Create new settings record
+            settings_data = Config.model_dump(mode="json")
+            settings_data["version"] = version
+            self.settings_table.add(
+                [SettingsRecord(id="settings", settings=json.dumps(settings_data))]
+            )
+    def recreate_embeddings_table(self) -> None:
+        """Recreate the chunks table with current vector dimensions."""
+        # Drop and recreate chunks table
+        try:
+            self.db.drop_table("chunks")
+        except Exception:
+            pass
+        # Update the ChunkRecord model with new vector dimension
+        self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
+        self.chunks_table = self.db.create_table("chunks", schema=self.ChunkRecord)
+        # Create FTS index on the new table with phrase query support
+        self.chunks_table.create_fts_index(
+            "content", replace=True, with_position=True, remove_stop_words=False
+        )
+    def close(self):
+        """Close the database connection."""
+        # LanceDB connections are automatically managed
+        pass
+    def current_table_versions(self) -> dict[str, int]:
+        """Capture current versions of key tables for rollback using LanceDB's API."""
+        return {
+            "documents": int(self.documents_table.version),
+            "chunks": int(self.chunks_table.version),
+            "settings": int(self.settings_table.version),
+        }
+    def restore_table_versions(self, versions: dict[str, int]) -> bool:
+        """Restore tables to the provided versions using LanceDB's API."""
+        self.documents_table.restore(int(versions["documents"]))
+        self.chunks_table.restore(int(versions["chunks"]))
+        self.settings_table.restore(int(versions["settings"]))
+        return True
+    @property
+    def _connection(self):
+        """Compatibility property for repositories expecting _connection."""
+        return self

haiku/rag/store/models/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .chunk import Chunk
+from .document import Document
+__all__ = ["Chunk", "Document"]

haiku/rag/store/models/chunk.py ADDED Viewed

@@ -0,0 +1,17 @@
+from pydantic import BaseModel
+class Chunk(BaseModel):
+    """
+    Represents a chunk with content, metadata, and optional document information.
+    """
+    id: str | None = None
+    document_id: str | None = None
+    content: str
+    metadata: dict = {}
+    order: int = 0
+    document_uri: str | None = None
+    document_title: str | None = None
+    document_meta: dict = {}
+    embedding: list[float] | None = None

haiku/rag/store/models/document.py ADDED Viewed

@@ -0,0 +1,17 @@
+from datetime import datetime
+from pydantic import BaseModel, Field
+class Document(BaseModel):
+    """
+    Represents a document with an ID, content, and metadata.
+    """
+    id: str | None = None
+    content: str
+    uri: str | None = None
+    title: str | None = None
+    metadata: dict = {}
+    created_at: datetime = Field(default_factory=datetime.now)
+    updated_at: datetime = Field(default_factory=datetime.now)

haiku/rag/store/repositories/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from haiku.rag.store.repositories.chunk import ChunkRepository
+from haiku.rag.store.repositories.document import DocumentRepository
+from haiku.rag.store.repositories.settings import SettingsRepository
+__all__ = [
+    "ChunkRepository",
+    "DocumentRepository",
+    "SettingsRepository",
+]