PyPI - ragit - Versions diffs - 0.8.2__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

ragit 0.8.2py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

ragit/__init__.py +27 -15
ragit/assistant.py +431 -40
ragit/config.py +165 -22
ragit/core/experiment/experiment.py +7 -1
ragit/exceptions.py +271 -0
ragit/loaders.py +200 -44
ragit/logging.py +194 -0
ragit/monitor.py +307 -0
ragit/providers/__init__.py +1 -13
ragit/providers/ollama.py +379 -121
ragit/utils/__init__.py +0 -22
ragit/version.py +1 -1
{ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/METADATA +48 -25
ragit-0.11.0.dist-info/RECORD +22 -0
{ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/WHEEL +1 -1
ragit/providers/sentence_transformers.py +0 -225
ragit-0.8.2.dist-info/RECORD +0 -20
{ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/licenses/LICENSE +0 -0
{ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/top_level.txt +0 -0

ragit/config.py CHANGED Viewed

@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 """
-Ragit configuration management.
+Ragit configuration management with Pydantic validation.
 Loads configuration from environment variables and .env files.
+Validates all configuration values at startup.
 Note: As of v0.8.0, ragit no longer has default LLM or embedding models.
 Users must explicitly configure providers.
@@ -15,6 +16,10 @@ import os
 from pathlib import Path
 from dotenv import load_dotenv
+from pydantic import BaseModel, Field, field_validator
+# Note: We define ConfigValidationError locally to avoid circular imports,
+# but ragit.exceptions.ConfigurationError can be used elsewhere
 # Load .env file from current working directory or project root
 _env_path = Path.cwd() / ".env"
@@ -29,32 +34,170 @@ else:
             break
-class Config:
-    """Ragit configuration loaded from environment variables.
+class ConfigValidationError(Exception):
+    """Raised when configuration validation fails."""
+    pass
+class RagitConfig(BaseModel):
+    """Validated ragit configuration.
-    Note: As of v0.8.0, DEFAULT_LLM_MODEL and DEFAULT_EMBEDDING_MODEL are
-    no longer used as defaults. They are only read from environment variables
-    for backwards compatibility with user configurations.
+    All configuration values are validated at startup. Invalid values
+    raise ConfigValidationError with a descriptive message.
+    Attributes
+    ----------
+    ollama_base_url : str
+        Ollama server URL (default: http://localhost:11434)
+    ollama_embedding_url : str
+        Embedding API URL (defaults to ollama_base_url)
+    ollama_api_key : str | None
+        API key for authentication
+    ollama_timeout : int
+        Request timeout in seconds (1-600)
+    default_llm_model : str | None
+        Default LLM model name
+    default_embedding_model : str | None
+        Default embedding model name
+    log_level : str
+        Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
     """
-    # Ollama LLM API Configuration (used when explicitly using OllamaProvider)
-    OLLAMA_BASE_URL: str = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
-    OLLAMA_API_KEY: str | None = os.getenv("OLLAMA_API_KEY")
-    OLLAMA_TIMEOUT: int = int(os.getenv("OLLAMA_TIMEOUT", "120"))
+    ollama_base_url: str = Field(default="http://localhost:11434")
+    ollama_embedding_url: str | None = None
+    ollama_api_key: str | None = None
+    ollama_timeout: int = Field(default=120, gt=0, le=600)
+    default_llm_model: str | None = None
+    default_embedding_model: str | None = None
+    log_level: str = Field(default="INFO")
+    @field_validator("ollama_base_url", "ollama_embedding_url", mode="before")
+    @classmethod
+    def validate_url(cls, v: str | None) -> str | None:
+        """Validate URL format."""
+        if v is None:
+            return v
+        v = str(v).strip().rstrip("/")
+        if not v:
+            return None
+        if not v.startswith(("http://", "https://")):
+            raise ValueError(f"URL must start with http:// or https://: {v}")
+        return v
+    @field_validator("log_level", mode="before")
+    @classmethod
+    def validate_log_level(cls, v: str) -> str:
+        """Validate log level is a valid Python logging level."""
+        valid_levels = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
+        v = str(v).upper().strip()
+        if v not in valid_levels:
+            raise ValueError(f"Invalid log level: {v}. Must be one of {valid_levels}")
+        return v
+    @field_validator("ollama_api_key", mode="before")
+    @classmethod
+    def validate_api_key(cls, v: str | None) -> str | None:
+        """Treat empty string as None."""
+        if v is not None and not str(v).strip():
+            return None
+        return v
+    @field_validator("ollama_timeout", mode="before")
+    @classmethod
+    def validate_timeout(cls, v: int | str) -> int:
+        """Parse and validate timeout value."""
+        try:
+            timeout = int(v)
+        except (ValueError, TypeError) as e:
+            raise ValueError(f"Invalid timeout value '{v}': must be an integer") from e
+        return timeout
+    model_config = {"extra": "forbid"}
+    # Uppercase aliases for backwards compatibility
+    @property
+    def OLLAMA_BASE_URL(self) -> str:
+        return self.ollama_base_url
+    @property
+    def OLLAMA_EMBEDDING_URL(self) -> str:
+        return self.ollama_embedding_url or self.ollama_base_url
+    @property
+    def OLLAMA_API_KEY(self) -> str | None:
+        return self.ollama_api_key
-    # Ollama Embedding API Configuration
-    OLLAMA_EMBEDDING_URL: str = os.getenv(
-        "OLLAMA_EMBEDDING_URL", os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
-    )
+    @property
+    def OLLAMA_TIMEOUT(self) -> int:
+        return self.ollama_timeout
+    @property
+    def DEFAULT_LLM_MODEL(self) -> str | None:
+        return self.default_llm_model
+    @property
+    def DEFAULT_EMBEDDING_MODEL(self) -> str | None:
+        return self.default_embedding_model
+    @property
+    def LOG_LEVEL(self) -> str:
+        return self.log_level
+def _safe_get_env(key: str, default: str | None = None) -> str | None:
+    """Get environment variable, returning None for empty strings."""
+    value = os.getenv(key, default)
+    if value is not None and not value.strip():
+        return default
+    return value
+def _safe_get_int_env(key: str, default: int) -> int:
+    """Get environment variable as int, raising on invalid values."""
+    value = os.getenv(key)
+    if value is None:
+        return default
+    try:
+        return int(value)
+    except ValueError:
+        raise ConfigValidationError(f"Invalid integer value for {key}: {value!r}") from None
+def load_config() -> RagitConfig:
+    """Load and validate configuration from environment variables.
+    Returns
+    -------
+    RagitConfig
+        Validated configuration object.
+    Raises
+    ------
+    ConfigValidationError
+        If configuration validation fails.
+    """
+    try:
+        return RagitConfig(
+            ollama_base_url=_safe_get_env("OLLAMA_BASE_URL", "http://localhost:11434") or "http://localhost:11434",
+            ollama_embedding_url=_safe_get_env("OLLAMA_EMBEDDING_URL") or _safe_get_env("OLLAMA_BASE_URL"),
+            ollama_api_key=_safe_get_env("OLLAMA_API_KEY"),
+            ollama_timeout=_safe_get_int_env("OLLAMA_TIMEOUT", 120),
+            default_llm_model=_safe_get_env("RAGIT_DEFAULT_LLM_MODEL"),
+            default_embedding_model=_safe_get_env("RAGIT_DEFAULT_EMBEDDING_MODEL"),
+            log_level=_safe_get_env("RAGIT_LOG_LEVEL", "INFO") or "INFO",
+        )
+    except Exception as e:
+        raise ConfigValidationError(f"Configuration error: {e}") from e
-    # Model settings (only used if explicitly requested, no defaults)
-    # These can still be set via environment variables for convenience
-    DEFAULT_LLM_MODEL: str | None = os.getenv("RAGIT_DEFAULT_LLM_MODEL")
-    DEFAULT_EMBEDDING_MODEL: str | None = os.getenv("RAGIT_DEFAULT_EMBEDDING_MODEL")
-    # Logging
-    LOG_LEVEL: str = os.getenv("RAGIT_LOG_LEVEL", "INFO")
+# Singleton instance - validates configuration at import time
+try:
+    config = load_config()
+except ConfigValidationError as e:
+    # Re-raise with clear message
+    raise ConfigValidationError(str(e)) from e
-# Singleton instance
-config = Config()
+# Backwards compatibility alias
+Config = RagitConfig

ragit/core/experiment/experiment.py CHANGED Viewed

@@ -45,7 +45,13 @@ class Document:
 @dataclass
 class Chunk:
-    """A document chunk."""
+    """A document chunk with optional rich metadata.
+    Metadata can include:
+    - document_id: SHA256 hash for deduplication and window search
+    - sequence_number: Order within the document
+    - chunk_start/chunk_end: Character positions in original text
+    """
     content: str
     doc_id: str

ragit/exceptions.py ADDED Viewed

@@ -0,0 +1,271 @@
+#
+# Copyright RODMENA LIMITED 2025
+# SPDX-License-Identifier: Apache-2.0
+#
+"""
+Custom exception hierarchy for ragit.
+Provides structured exceptions for different failure types,
+enabling better error handling and debugging.
+Pattern inspired by ai4rag exception_handler.py.
+"""
+from typing import Any
+class RagitError(Exception):
+    """Base exception for all ragit errors.
+    All ragit-specific exceptions inherit from this class,
+    making it easy to catch all ragit errors with a single handler.
+    Parameters
+    ----------
+    message : str
+        Human-readable error message.
+    original_exception : BaseException, optional
+        The underlying exception that caused this error.
+    Examples
+    --------
+    >>> try:
+    ...     provider.embed("text", "model")
+    ... except RagitError as e:
+    ...     print(f"Ragit error: {e}")
+    ...     if e.original_exception:
+    ...         print(f"Caused by: {e.original_exception}")
+    """
+    def __init__(self, message: str, original_exception: BaseException | None = None):
+        self.message = message
+        self.original_exception = original_exception
+        super().__init__(self._format_message())
+    def _format_message(self) -> str:
+        """Format the error message, including original exception if present."""
+        if self.original_exception:
+            return f"{self.message}: {self.original_exception}"
+        return self.message
+class ConfigurationError(RagitError):
+    """Configuration validation or loading failed.
+    Raised when:
+    - Environment variables have invalid values
+    - Required configuration is missing
+    - URL formats are invalid
+    """
+    pass
+class ProviderError(RagitError):
+    """Provider communication or operation failed.
+    Raised when:
+    - Network connection to provider fails
+    - Provider returns an error response
+    - Provider timeout occurs
+    """
+    pass
+class IndexingError(RagitError):
+    """Document indexing or embedding failed.
+    Raised when:
+    - Embedding generation fails
+    - Document chunking fails
+    - Index building fails
+    """
+    pass
+class RetrievalError(RagitError):
+    """Retrieval operation failed.
+    Raised when:
+    - Query embedding fails
+    - Search operation fails
+    - No results can be retrieved
+    """
+    pass
+class GenerationError(RagitError):
+    """LLM generation failed.
+    Raised when:
+    - LLM call fails
+    - Response parsing fails
+    - Context exceeds model limits
+    """
+    pass
+class EvaluationError(RagitError):
+    """Evaluation or scoring failed.
+    Raised when:
+    - Metric calculation fails
+    - Benchmark validation fails
+    - Score extraction fails
+    """
+    pass
+class ExceptionAggregator:
+    """Collect and report exceptions during batch operations.
+    Useful for operations that should continue even when some
+    items fail, then report all failures at the end.
+    Pattern from ai4rag exception_handler.py.
+    Examples
+    --------
+    >>> aggregator = ExceptionAggregator()
+    >>> for doc in documents:
+    ...     try:
+    ...         process(doc)
+    ...     except Exception as e:
+    ...         aggregator.record(f"doc:{doc.id}", e)
+    >>> if aggregator.has_errors:
+    ...     print(aggregator.get_summary())
+    """
+    def __init__(self) -> None:
+        self._exceptions: list[tuple[str, Exception]] = []
+    def record(self, context: str, exception: Exception) -> None:
+        """Record an exception with context.
+        Parameters
+        ----------
+        context : str
+            Description of where/why the exception occurred.
+        exception : Exception
+            The exception that was raised.
+        """
+        self._exceptions.append((context, exception))
+    @property
+    def has_errors(self) -> bool:
+        """Check if any errors have been recorded."""
+        return len(self._exceptions) > 0
+    @property
+    def error_count(self) -> int:
+        """Get the number of recorded errors."""
+        return len(self._exceptions)
+    @property
+    def exceptions(self) -> list[tuple[str, Exception]]:
+        """Get all recorded exceptions with their contexts."""
+        return list(self._exceptions)
+    def get_by_type(self, exc_type: type[Exception]) -> list[tuple[str, Exception]]:
+        """Get exceptions of a specific type.
+        Parameters
+        ----------
+        exc_type : type
+            The exception type to filter by.
+        Returns
+        -------
+        list[tuple[str, Exception]]
+            Exceptions matching the type with their contexts.
+        """
+        return [(ctx, exc) for ctx, exc in self._exceptions if isinstance(exc, exc_type)]
+    def get_summary(self) -> str:
+        """Get a summary of all recorded errors.
+        Returns
+        -------
+        str
+            Human-readable summary of errors.
+        """
+        if not self._exceptions:
+            return "No errors recorded"
+        # Group by exception type
+        by_type: dict[str, int] = {}
+        for _, exc in self._exceptions:
+            exc_type = type(exc).__name__
+            by_type[exc_type] = by_type.get(exc_type, 0) + 1
+        most_common = max(by_type.items(), key=lambda x: x[1])
+        type_summary = ", ".join(f"{t}:{c}" for t, c in sorted(by_type.items(), key=lambda x: -x[1]))
+        return f"{self.error_count} errors ({type_summary}). Most common: {most_common[0]} ({most_common[1]}x)"
+    def get_details(self) -> str:
+        """Get detailed information about all errors.
+        Returns
+        -------
+        str
+            Detailed error information with contexts.
+        """
+        if not self._exceptions:
+            return "No errors recorded"
+        lines = [f"Total errors: {self.error_count}", ""]
+        for i, (context, exc) in enumerate(self._exceptions, 1):
+            lines.append(f"{i}. [{context}] {type(exc).__name__}: {exc}")
+        return "\n".join(lines)
+    def raise_if_errors(self, message: str = "Operation failed") -> None:
+        """Raise RagitError if any errors were recorded.
+        Parameters
+        ----------
+        message : str
+            Base message for the raised error.
+        Raises
+        ------
+        RagitError
+            If any errors were recorded.
+        """
+        if self.has_errors:
+            raise RagitError(f"{message}: {self.get_summary()}")
+    def clear(self) -> None:
+        """Clear all recorded exceptions."""
+        self._exceptions.clear()
+    def merge_from(self, other: "ExceptionAggregator") -> None:
+        """Merge exceptions from another aggregator.
+        Parameters
+        ----------
+        other : ExceptionAggregator
+            Another aggregator to merge from.
+        """
+        self._exceptions.extend(other._exceptions)
+    def to_dict(self) -> dict[str, Any]:
+        """Export as dictionary for JSON serialization.
+        Returns
+        -------
+        dict
+            Dictionary representation of aggregated errors.
+        """
+        return {
+            "error_count": self.error_count,
+            "errors": [
+                {"context": ctx, "type": type(exc).__name__, "message": str(exc)} for ctx, exc in self._exceptions
+            ],
+        }

ragit 0.8.2__py3-none-any.whl → 0.11.0__py3-none-any.whl

ragit 0.8.2py3-none-any.whl → 0.11.0py3-none-any.whl